xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 26e7fa26ff6a485bf12e845bcb80b878de68671e)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "../src/mat/impls/sbaij/mpi/mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 #include "petscblaslapack.h"
7 
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
11 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
12 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
14 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
17 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
22 EXTERN PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 EXTERN_C_BEGIN
25 #undef __FUNCT__
26 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
27 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
28 {
29   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
30   PetscErrorCode ierr;
31 
32   PetscFunctionBegin;
33   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
34   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
35   PetscFunctionReturn(0);
36 }
37 EXTERN_C_END
38 
39 EXTERN_C_BEGIN
40 #undef __FUNCT__
41 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
42 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
43 {
44   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
45   PetscErrorCode ierr;
46 
47   PetscFunctionBegin;
48   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
49   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
50   PetscFunctionReturn(0);
51 }
52 EXTERN_C_END
53 
54 
55 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
56 { \
57  \
58     brow = row/bs;  \
59     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
60     rmax = aimax[brow]; nrow = ailen[brow]; \
61       bcol = col/bs; \
62       ridx = row % bs; cidx = col % bs; \
63       low = 0; high = nrow; \
64       while (high-low > 3) { \
65         t = (low+high)/2; \
66         if (rp[t] > bcol) high = t; \
67         else              low  = t; \
68       } \
69       for (_i=low; _i<high; _i++) { \
70         if (rp[_i] > bcol) break; \
71         if (rp[_i] == bcol) { \
72           bap  = ap +  bs2*_i + bs*cidx + ridx; \
73           if (addv == ADD_VALUES) *bap += value;  \
74           else                    *bap  = value;  \
75           goto a_noinsert; \
76         } \
77       } \
78       if (a->nonew == 1) goto a_noinsert; \
79       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
80       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
81       N = nrow++ - 1;  \
82       /* shift up all the later entries in this row */ \
83       for (ii=N; ii>=_i; ii--) { \
84         rp[ii+1] = rp[ii]; \
85         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
86       } \
87       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
88       rp[_i]                      = bcol;  \
89       ap[bs2*_i + bs*cidx + ridx] = value;  \
90       a_noinsert:; \
91     ailen[brow] = nrow; \
92 }
93 
94 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
95 { \
96     brow = row/bs;  \
97     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
98     rmax = bimax[brow]; nrow = bilen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto b_noinsert; \
114         } \
115       } \
116       if (b->nonew == 1) goto b_noinsert; \
117       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
118       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
119       N = nrow++ - 1;  \
120       /* shift up all the later entries in this row */ \
121       for (ii=N; ii>=_i; ii--) { \
122         rp[ii+1] = rp[ii]; \
123         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
124       } \
125       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
126       rp[_i]                      = bcol;  \
127       ap[bs2*_i + bs*cidx + ridx] = value;  \
128       b_noinsert:; \
129     bilen[brow] = nrow; \
130 }
131 
132 /* Only add/insert a(i,j) with i<=j (blocks).
133    Any a(i,j) with i>j input by user is ingored.
134 */
135 #undef __FUNCT__
136 #define __FUNCT__ "MatSetValues_MPISBAIJ"
137 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
138 {
139   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
140   MatScalar      value;
141   PetscTruth     roworiented = baij->roworiented;
142   PetscErrorCode ierr;
143   PetscInt       i,j,row,col;
144   PetscInt       rstart_orig=mat->rmap->rstart;
145   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
146   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
147 
148   /* Some Variables required in the macro */
149   Mat            A = baij->A;
150   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
151   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
152   MatScalar      *aa=a->a;
153 
154   Mat            B = baij->B;
155   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
156   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
157   MatScalar     *ba=b->a;
158 
159   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
160   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
161   MatScalar     *ap,*bap;
162 
163   /* for stash */
164   PetscInt      n_loc, *in_loc = PETSC_NULL;
165   MatScalar     *v_loc = PETSC_NULL;
166 
167   PetscFunctionBegin;
168   if (v) PetscValidScalarPointer(v,6);
169   if (!baij->donotstash){
170     if (n > baij->n_loc) {
171       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
172       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
175       baij->n_loc = n;
176     }
177     in_loc = baij->in_loc;
178     v_loc  = baij->v_loc;
179   }
180 
181   for (i=0; i<m; i++) {
182     if (im[i] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
185 #endif
186     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
187       row = im[i] - rstart_orig;              /* local row index */
188       for (j=0; j<n; j++) {
189         if (im[i]/bs > in[j]/bs){
190           if (a->ignore_ltriangular){
191             continue;    /* ignore lower triangular blocks */
192           } else {
193             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
194           }
195         }
196         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
197           col = in[j] - cstart_orig;          /* local col index */
198           brow = row/bs; bcol = col/bs;
199           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
200           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
229           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
230           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
231         }
232       }
233     } else {  /* off processor entry */
234       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = sqrt(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscTruth     other_disassembled;
522   PetscMPIInt    n;
523   PetscTruth     r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscTruth        iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
626              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
633       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
639       PetscFunctionReturn(0);
640     }
641   }
642 
643   if (isdraw) {
644     PetscDraw  draw;
645     PetscTruth isnull;
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
648   }
649 
650   if (size == 1) {
651     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
652     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
653   } else {
654     /* assemble the entire matrix onto first processor. */
655     Mat          A;
656     Mat_SeqSBAIJ *Aloc;
657     Mat_SeqBAIJ  *Bloc;
658     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
659     MatScalar    *a;
660 
661     /* Should this be the same type as mat? */
662     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
669     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
670     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
671 
672     /* copy over the A part */
673     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
674     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
675     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
676 
677     for (i=0; i<mbs; i++) {
678       rvals[0] = bs*(baij->rstartbs + i);
679       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
680       for (j=ai[i]; j<ai[i+1]; j++) {
681         col = (baij->cstartbs+aj[j])*bs;
682         for (k=0; k<bs; k++) {
683           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
684           col++; a += bs;
685         }
686       }
687     }
688     /* copy over the B part */
689     Bloc = (Mat_SeqBAIJ*)baij->B->data;
690     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
691     for (i=0; i<mbs; i++) {
692 
693       rvals[0] = bs*(baij->rstartbs + i);
694       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
695       for (j=ai[i]; j<ai[i+1]; j++) {
696         col = baij->garray[aj[j]]*bs;
697         for (k=0; k<bs; k++) {
698           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
699           col++; a += bs;
700         }
701       }
702     }
703     ierr = PetscFree(rvals);CHKERRQ(ierr);
704     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     /*
707        Everyone has to call to draw the matrix since the graphics waits are
708        synchronized across all processors that share the PetscDraw object
709     */
710     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
713       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
714     }
715     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     ierr = MatDestroy(A);CHKERRQ(ierr);
717   }
718   PetscFunctionReturn(0);
719 }
720 
721 #undef __FUNCT__
722 #define __FUNCT__ "MatView_MPISBAIJ"
723 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
724 {
725   PetscErrorCode ierr;
726   PetscTruth     iascii,isdraw,issocket,isbinary;
727 
728   PetscFunctionBegin;
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
733   if (iascii || isdraw || issocket || isbinary) {
734     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
735   } else {
736     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatDestroy_MPISBAIJ"
743 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
744 {
745   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
746   PetscErrorCode ierr;
747 
748   PetscFunctionBegin;
749 #if defined(PETSC_USE_LOG)
750   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
751 #endif
752   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
753   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
754   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
755   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
756 #if defined (PETSC_USE_CTABLE)
757   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
758 #else
759   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
760 #endif
761   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
762   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
763   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
764   if (baij->slvec0) {
765     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
766     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
767   }
768   if (baij->slvec1) {
769     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
771     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
772   }
773   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
778   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
779   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
780 #if defined(PETSC_USE_SCALAR_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(baij);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
793   PetscFunctionReturn(0);
794 }
795 
796 #undef __FUNCT__
797 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
798 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
799 {
800   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
801   PetscErrorCode ierr;
802   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
803   PetscScalar    *x,*from;
804 
805   PetscFunctionBegin;
806   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
807   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
808 
809   /* diagonal part */
810   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
811   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
812 
813   /* subdiagonal part */
814   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
815 
816   /* copy x into the vec slvec0 */
817   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
818   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
819 
820   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
821   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   /* supperdiagonal part */
827   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "MatMult_MPISBAIJ"
833 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
834 {
835   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
836   PetscErrorCode ierr;
837   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
838   PetscScalar    *x,*from;
839 
840   PetscFunctionBegin;
841   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
842   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
843 
844   /* diagonal part */
845   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
846   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
847 
848   /* subdiagonal part */
849   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
850 
851   /* copy x into the vec slvec0 */
852   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
853   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
854 
855   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
856   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
857   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
858 
859   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   /* supperdiagonal part */
862   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
868 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
869 {
870   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
871   PetscErrorCode ierr;
872   PetscInt       nt;
873 
874   PetscFunctionBegin;
875   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
876   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
877 
878   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
879   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
880 
881   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   /* do diagonal part */
883   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
884   /* do supperdiagonal part */
885   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
887   /* do subdiagonal part */
888   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
889   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891 
892   PetscFunctionReturn(0);
893 }
894 
895 #undef __FUNCT__
896 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
897 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
898 {
899   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
900   PetscErrorCode ierr;
901   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
902   PetscScalar    *x,*from,zero=0.0;
903 
904   PetscFunctionBegin;
905   /*
906   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
907   PetscSynchronizedFlush(((PetscObject)A)->comm);
908   */
909   /* diagonal part */
910   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
911   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
912 
913   /* subdiagonal part */
914   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
915 
916   /* copy x into the vec slvec0 */
917   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
918   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
919   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
920   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
921 
922   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
924   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925 
926   /* supperdiagonal part */
927   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
928 
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   /* do diagonal part */
942   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
943   /* do supperdiagonal part */
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
946 
947   /* do subdiagonal part */
948   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
949   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951 
952   PetscFunctionReturn(0);
953 }
954 
955 /*
956   This only works correctly for square matrices where the subblock A->A is the
957    diagonal block
958 */
959 #undef __FUNCT__
960 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
961 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
962 {
963   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
964   PetscErrorCode ierr;
965 
966   PetscFunctionBegin;
967   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
968   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatScale_MPISBAIJ"
974 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
975 {
976   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
981   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatGetRow_MPISBAIJ"
987 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
988 {
989   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
990   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
991   PetscErrorCode ierr;
992   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
993   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
994   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
995 
996   PetscFunctionBegin;
997   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
998   mat->getrowactive = PETSC_TRUE;
999 
1000   if (!mat->rowvalues && (idx || v)) {
1001     /*
1002         allocate enough space to hold information from the longest row.
1003     */
1004     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1005     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1006     PetscInt     max = 1,mbs = mat->mbs,tmp;
1007     for (i=0; i<mbs; i++) {
1008       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1009       if (max < tmp) { max = tmp; }
1010     }
1011     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1012   }
1013 
1014   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1015   lrow = row - brstart;  /* local row index */
1016 
1017   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1018   if (!v)   {pvA = 0; pvB = 0;}
1019   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1020   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1021   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1022   nztot = nzA + nzB;
1023 
1024   cmap  = mat->garray;
1025   if (v  || idx) {
1026     if (nztot) {
1027       /* Sort by increasing column numbers, assuming A and B already sorted */
1028       PetscInt imark = -1;
1029       if (v) {
1030         *v = v_p = mat->rowvalues;
1031         for (i=0; i<nzB; i++) {
1032           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1033           else break;
1034         }
1035         imark = i;
1036         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1037         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1038       }
1039       if (idx) {
1040         *idx = idx_p = mat->rowindices;
1041         if (imark > -1) {
1042           for (i=0; i<imark; i++) {
1043             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1044           }
1045         } else {
1046           for (i=0; i<nzB; i++) {
1047             if (cmap[cworkB[i]/bs] < cstart)
1048               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1049             else break;
1050           }
1051           imark = i;
1052         }
1053         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1054         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1055       }
1056     } else {
1057       if (idx) *idx = 0;
1058       if (v)   *v   = 0;
1059     }
1060   }
1061   *nz = nztot;
1062   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1063   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1070 {
1071   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1072 
1073   PetscFunctionBegin;
1074   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1075   baij->getrowactive = PETSC_FALSE;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1082 {
1083   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1084   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1085 
1086   PetscFunctionBegin;
1087   aA->getrow_utriangular = PETSC_TRUE;
1088   PetscFunctionReturn(0);
1089 }
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1093 {
1094   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1095   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1096 
1097   PetscFunctionBegin;
1098   aA->getrow_utriangular = PETSC_FALSE;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1105 {
1106   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1111   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1118 {
1119   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1124   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1131 {
1132   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1137   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1144 {
1145   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1146   Mat            A = a->A,B = a->B;
1147   PetscErrorCode ierr;
1148   PetscReal      isend[5],irecv[5];
1149 
1150   PetscFunctionBegin;
1151   info->block_size     = (PetscReal)matin->rmap->bs;
1152   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1153   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1154   isend[3] = info->memory;  isend[4] = info->mallocs;
1155   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1156   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1157   isend[3] += info->memory;  isend[4] += info->mallocs;
1158   if (flag == MAT_LOCAL) {
1159     info->nz_used      = isend[0];
1160     info->nz_allocated = isend[1];
1161     info->nz_unneeded  = isend[2];
1162     info->memory       = isend[3];
1163     info->mallocs      = isend[4];
1164   } else if (flag == MAT_GLOBAL_MAX) {
1165     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1166     info->nz_used      = irecv[0];
1167     info->nz_allocated = irecv[1];
1168     info->nz_unneeded  = irecv[2];
1169     info->memory       = irecv[3];
1170     info->mallocs      = irecv[4];
1171   } else if (flag == MAT_GLOBAL_SUM) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1173     info->nz_used      = irecv[0];
1174     info->nz_allocated = irecv[1];
1175     info->nz_unneeded  = irecv[2];
1176     info->memory       = irecv[3];
1177     info->mallocs      = irecv[4];
1178   } else {
1179     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1180   }
1181   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1182   info->fill_ratio_needed = 0;
1183   info->factor_mallocs    = 0;
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1189 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1190 {
1191   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1192   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1193   PetscErrorCode ierr;
1194 
1195   PetscFunctionBegin;
1196   switch (op) {
1197   case MAT_NEW_NONZERO_LOCATIONS:
1198   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1199   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1200   case MAT_KEEP_NONZERO_PATTERN:
1201   case MAT_NEW_NONZERO_LOCATION_ERR:
1202     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1203     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1204     break;
1205   case MAT_ROW_ORIENTED:
1206     a->roworiented = flg;
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1209     break;
1210   case MAT_NEW_DIAGONALS:
1211     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1212     break;
1213   case MAT_IGNORE_OFF_PROC_ENTRIES:
1214     a->donotstash = flg;
1215     break;
1216   case MAT_USE_HASH_TABLE:
1217     a->ht_flag = flg;
1218     break;
1219   case MAT_HERMITIAN:
1220     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1221     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1222     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1223     break;
1224   case MAT_SPD:
1225     A->spd_set                         = PETSC_TRUE;
1226     A->spd                             = flg;
1227     if (flg) {
1228       A->symmetric                     = PETSC_TRUE;
1229       A->structurally_symmetric        = PETSC_TRUE;
1230       A->symmetric_set                 = PETSC_TRUE;
1231       A->structurally_symmetric_set    = PETSC_TRUE;
1232     }
1233     break;
1234   case MAT_SYMMETRIC:
1235     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1236     break;
1237   case MAT_STRUCTURALLY_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1239     break;
1240   case MAT_SYMMETRY_ETERNAL:
1241     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1242     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1243     break;
1244   case MAT_IGNORE_LOWER_TRIANGULAR:
1245     aA->ignore_ltriangular = flg;
1246     break;
1247   case MAT_ERROR_LOWER_TRIANGULAR:
1248     aA->ignore_ltriangular = flg;
1249     break;
1250   case MAT_GETROW_UPPERTRIANGULAR:
1251     aA->getrow_utriangular = flg;
1252     break;
1253   default:
1254     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1261 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1262 {
1263   PetscErrorCode ierr;
1264   PetscFunctionBegin;
1265   if (MAT_INITIAL_MATRIX || *B != A) {
1266     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1273 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1274 {
1275   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1276   Mat            a=baij->A, b=baij->B;
1277   PetscErrorCode ierr;
1278   PetscInt       nv,m,n;
1279   PetscTruth     flg;
1280 
1281   PetscFunctionBegin;
1282   if (ll != rr){
1283     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1284     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1285   }
1286   if (!ll) PetscFunctionReturn(0);
1287 
1288   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1289   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1290 
1291   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1292   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1293 
1294   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295 
1296   /* left diagonalscale the off-diagonal part */
1297   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1298 
1299   /* scale the diagonal part */
1300   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1301 
1302   /* right diagonalscale the off-diagonal part */
1303   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatEqual_MPISBAIJ"
1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1325 {
1326   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1327   Mat            a,b,c,d;
1328   PetscTruth     flg;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   a = matA->A; b = matA->B;
1333   c = matB->A; d = matB->B;
1334 
1335   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1336   if (flg) {
1337     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1338   }
1339   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCopy_MPISBAIJ"
1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1346 {
1347   PetscErrorCode ierr;
1348   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1349   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1350 
1351   PetscFunctionBegin;
1352   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1353   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1354     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1355     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1356     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1357   } else {
1358     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1359     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1366 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1367 {
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1377 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1378 {
1379   PetscErrorCode ierr;
1380   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1381   PetscBLASInt   bnz,one=1;
1382   Mat_SeqSBAIJ   *xa,*ya;
1383   Mat_SeqBAIJ    *xb,*yb;
1384 
1385   PetscFunctionBegin;
1386   if (str == SAME_NONZERO_PATTERN) {
1387     PetscScalar alpha = a;
1388     xa = (Mat_SeqSBAIJ *)xx->A->data;
1389     ya = (Mat_SeqSBAIJ *)yy->A->data;
1390     bnz = PetscBLASIntCast(xa->nz);
1391     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1392     xb = (Mat_SeqBAIJ *)xx->B->data;
1393     yb = (Mat_SeqBAIJ *)yy->B->data;
1394     bnz = PetscBLASIntCast(xb->nz);
1395     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1396   } else {
1397     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1398     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1399     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1406 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1407 {
1408   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1409   PetscInt        rbs,cbs;
1410   PetscErrorCode  ierr;
1411 
1412   PetscFunctionBegin;
1413   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1414   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1415   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1416   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1417   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1418   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1424 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1425 {
1426   PetscErrorCode ierr;
1427   PetscInt       i;
1428   PetscTruth     flg;
1429 
1430   PetscFunctionBegin;
1431   for (i=0; i<n; i++) {
1432     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1433     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1434   }
1435   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1436   PetscFunctionReturn(0);
1437 }
1438 
1439 
1440 /* -------------------------------------------------------------------*/
1441 static struct _MatOps MatOps_Values = {
1442        MatSetValues_MPISBAIJ,
1443        MatGetRow_MPISBAIJ,
1444        MatRestoreRow_MPISBAIJ,
1445        MatMult_MPISBAIJ,
1446 /* 4*/ MatMultAdd_MPISBAIJ,
1447        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1448        MatMultAdd_MPISBAIJ,
1449        0,
1450        0,
1451        0,
1452 /*10*/ 0,
1453        0,
1454        0,
1455        MatSOR_MPISBAIJ,
1456        MatTranspose_MPISBAIJ,
1457 /*15*/ MatGetInfo_MPISBAIJ,
1458        MatEqual_MPISBAIJ,
1459        MatGetDiagonal_MPISBAIJ,
1460        MatDiagonalScale_MPISBAIJ,
1461        MatNorm_MPISBAIJ,
1462 /*20*/ MatAssemblyBegin_MPISBAIJ,
1463        MatAssemblyEnd_MPISBAIJ,
1464        MatSetOption_MPISBAIJ,
1465        MatZeroEntries_MPISBAIJ,
1466 /*24*/ 0,
1467        0,
1468        0,
1469        0,
1470        0,
1471 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1472        0,
1473        0,
1474        0,
1475        0,
1476 /*34*/ MatDuplicate_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*39*/ MatAXPY_MPISBAIJ,
1482        MatGetSubMatrices_MPISBAIJ,
1483        MatIncreaseOverlap_MPISBAIJ,
1484        MatGetValues_MPISBAIJ,
1485        MatCopy_MPISBAIJ,
1486 /*44*/ 0,
1487        MatScale_MPISBAIJ,
1488        0,
1489        0,
1490        0,
1491 /*49*/ MatSetBlockSize_MPISBAIJ,
1492        0,
1493        0,
1494        0,
1495        0,
1496 /*54*/ 0,
1497        0,
1498        MatSetUnfactored_MPISBAIJ,
1499        0,
1500        MatSetValuesBlocked_MPISBAIJ,
1501 /*59*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*64*/ 0,
1507        0,
1508        0,
1509        0,
1510        0,
1511 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*74*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*79*/ 0,
1522        0,
1523        0,
1524        0,
1525        MatLoad_MPISBAIJ,
1526 /*84*/ 0,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*89*/ 0,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*94*/ 0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*99*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*104*/0,
1547        MatRealPart_MPISBAIJ,
1548        MatImaginaryPart_MPISBAIJ,
1549        MatGetRowUpperTriangular_MPISBAIJ,
1550        MatRestoreRowUpperTriangular_MPISBAIJ,
1551 /*109*/0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*114*/0,
1557        0,
1558        0,
1559        0,
1560        0,
1561 /*119*/0,
1562        0,
1563        0,
1564        0
1565 };
1566 
1567 
1568 EXTERN_C_BEGIN
1569 #undef __FUNCT__
1570 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1571 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1572 {
1573   PetscFunctionBegin;
1574   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1575   *iscopy = PETSC_FALSE;
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1583 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1584 {
1585   Mat_MPISBAIJ   *b;
1586   PetscErrorCode ierr;
1587   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1588 
1589   PetscFunctionBegin;
1590   if (bs < 0){
1591     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1592       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1593     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1594     bs   = PetscAbs(bs);
1595   }
1596   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1597   bs = newbs;
1598 
1599   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1600   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1601   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1602   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1603 
1604   B->rmap->bs = B->cmap->bs = bs;
1605   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1606   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1607 
1608   if (d_nnz) {
1609     for (i=0; i<B->rmap->n/bs; i++) {
1610       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1611     }
1612   }
1613   if (o_nnz) {
1614     for (i=0; i<B->rmap->n/bs; i++) {
1615       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1616     }
1617   }
1618 
1619   b   = (Mat_MPISBAIJ*)B->data;
1620   mbs = B->rmap->n/bs;
1621   Mbs = B->rmap->N/bs;
1622   if (mbs*bs != B->rmap->n) {
1623     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1624   }
1625 
1626   B->rmap->bs  = bs;
1627   b->bs2 = bs*bs;
1628   b->mbs = mbs;
1629   b->nbs = mbs;
1630   b->Mbs = Mbs;
1631   b->Nbs = Mbs;
1632 
1633   for (i=0; i<=b->size; i++) {
1634     b->rangebs[i] = B->rmap->range[i]/bs;
1635   }
1636   b->rstartbs = B->rmap->rstart/bs;
1637   b->rendbs   = B->rmap->rend/bs;
1638 
1639   b->cstartbs = B->cmap->rstart/bs;
1640   b->cendbs   = B->cmap->rend/bs;
1641 
1642   if (!B->preallocated) {
1643     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1644     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1645     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1646     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1647     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1648     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1649     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1650     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1651     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1652   }
1653 
1654   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1655   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1656   B->preallocated = PETSC_TRUE;
1657   PetscFunctionReturn(0);
1658 }
1659 EXTERN_C_END
1660 
1661 EXTERN_C_BEGIN
1662 #undef __FUNCT__
1663 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1664 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1665 {
1666   PetscInt       m,rstart,cstart,cend;
1667   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1668   const PetscInt *JJ=0;
1669   PetscScalar    *values=0;
1670   PetscErrorCode ierr;
1671 
1672   PetscFunctionBegin;
1673 
1674   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1675   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1676   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1677   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1678   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1679   m      = B->rmap->n/bs;
1680   rstart = B->rmap->rstart/bs;
1681   cstart = B->cmap->rstart/bs;
1682   cend   = B->cmap->rend/bs;
1683 
1684   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1685   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1686   for (i=0; i<m; i++) {
1687     nz = ii[i+1] - ii[i];
1688     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1689     nz_max = PetscMax(nz_max,nz);
1690     JJ  = jj + ii[i];
1691     for (j=0; j<nz; j++) {
1692       if (*JJ >= cstart) break;
1693       JJ++;
1694     }
1695     d = 0;
1696     for (; j<nz; j++) {
1697       if (*JJ++ >= cend) break;
1698       d++;
1699     }
1700     d_nnz[i] = d;
1701     o_nnz[i] = nz - d;
1702   }
1703   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1704   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1705 
1706   values = (PetscScalar*)V;
1707   if (!values) {
1708     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1709     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1710   }
1711   for (i=0; i<m; i++) {
1712     PetscInt          row    = i + rstart;
1713     PetscInt          ncols  = ii[i+1] - ii[i];
1714     const PetscInt    *icols = jj + ii[i];
1715     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1716     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1717   }
1718 
1719   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1720   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1721   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722 
1723   PetscFunctionReturn(0);
1724 }
1725 EXTERN_C_END
1726 
1727 EXTERN_C_BEGIN
1728 #if defined(PETSC_HAVE_MUMPS)
1729 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1730 #endif
1731 #if defined(PETSC_HAVE_SPOOLES)
1732 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1733 #endif
1734 #if defined(PETSC_HAVE_PASTIX)
1735 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1736 #endif
1737 EXTERN_C_END
1738 
1739 /*MC
1740    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1741    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1742 
1743    Options Database Keys:
1744 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1745 
1746   Level: beginner
1747 
1748 .seealso: MatCreateMPISBAIJ
1749 M*/
1750 
1751 EXTERN_C_BEGIN
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatCreate_MPISBAIJ"
1754 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1755 {
1756   Mat_MPISBAIJ   *b;
1757   PetscErrorCode ierr;
1758   PetscTruth     flg;
1759 
1760   PetscFunctionBegin;
1761 
1762   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1763   B->data = (void*)b;
1764   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1765 
1766   B->ops->destroy    = MatDestroy_MPISBAIJ;
1767   B->ops->view       = MatView_MPISBAIJ;
1768   B->mapping         = 0;
1769   B->assembled       = PETSC_FALSE;
1770 
1771   B->insertmode = NOT_SET_VALUES;
1772   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1773   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1774 
1775   /* build local table of row and column ownerships */
1776   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1777 
1778   /* build cache for off array entries formed */
1779   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1780   b->donotstash  = PETSC_FALSE;
1781   b->colmap      = PETSC_NULL;
1782   b->garray      = PETSC_NULL;
1783   b->roworiented = PETSC_TRUE;
1784 
1785   /* stuff used in block assembly */
1786   b->barray       = 0;
1787 
1788   /* stuff used for matrix vector multiply */
1789   b->lvec         = 0;
1790   b->Mvctx        = 0;
1791   b->slvec0       = 0;
1792   b->slvec0b      = 0;
1793   b->slvec1       = 0;
1794   b->slvec1a      = 0;
1795   b->slvec1b      = 0;
1796   b->sMvctx       = 0;
1797 
1798   /* stuff for MatGetRow() */
1799   b->rowindices   = 0;
1800   b->rowvalues    = 0;
1801   b->getrowactive = PETSC_FALSE;
1802 
1803   /* hash table stuff */
1804   b->ht           = 0;
1805   b->hd           = 0;
1806   b->ht_size      = 0;
1807   b->ht_flag      = PETSC_FALSE;
1808   b->ht_fact      = 0;
1809   b->ht_total_ct  = 0;
1810   b->ht_insert_ct = 0;
1811 
1812   b->in_loc       = 0;
1813   b->v_loc        = 0;
1814   b->n_loc        = 0;
1815   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1816     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1817     if (flg) {
1818       PetscReal fact = 1.39;
1819       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1820       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1821       if (fact <= 1.0) fact = 1.39;
1822       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1823       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1824     }
1825   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1826 
1827 #if defined(PETSC_HAVE_PASTIX)
1828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1829 					   "MatGetFactor_mpisbaij_pastix",
1830 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1831 #endif
1832 #if defined(PETSC_HAVE_MUMPS)
1833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1834                                      "MatGetFactor_sbaij_mumps",
1835                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1836 #endif
1837 #if defined(PETSC_HAVE_SPOOLES)
1838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1839                                      "MatGetFactor_mpisbaij_spooles",
1840                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1841 #endif
1842   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1843                                      "MatStoreValues_MPISBAIJ",
1844                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1846                                      "MatRetrieveValues_MPISBAIJ",
1847                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1849                                      "MatGetDiagonalBlock_MPISBAIJ",
1850                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1852                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1853                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1855                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1856                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1857   B->symmetric                  = PETSC_TRUE;
1858   B->structurally_symmetric     = PETSC_TRUE;
1859   B->symmetric_set              = PETSC_TRUE;
1860   B->structurally_symmetric_set = PETSC_TRUE;
1861   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1862   PetscFunctionReturn(0);
1863 }
1864 EXTERN_C_END
1865 
1866 /*MC
1867    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1868 
1869    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1870    and MATMPISBAIJ otherwise.
1871 
1872    Options Database Keys:
1873 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1874 
1875   Level: beginner
1876 
1877 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1878 M*/
1879 
1880 EXTERN_C_BEGIN
1881 #undef __FUNCT__
1882 #define __FUNCT__ "MatCreate_SBAIJ"
1883 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1884 {
1885   PetscErrorCode ierr;
1886   PetscMPIInt    size;
1887 
1888   PetscFunctionBegin;
1889   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1890   if (size == 1) {
1891     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1892   } else {
1893     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1894   }
1895   PetscFunctionReturn(0);
1896 }
1897 EXTERN_C_END
1898 
1899 #undef __FUNCT__
1900 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1901 /*@C
1902    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1903    the user should preallocate the matrix storage by setting the parameters
1904    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1905    performance can be increased by more than a factor of 50.
1906 
1907    Collective on Mat
1908 
1909    Input Parameters:
1910 +  A - the matrix
1911 .  bs   - size of blockk
1912 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1913            submatrix  (same for all local rows)
1914 .  d_nnz - array containing the number of block nonzeros in the various block rows
1915            in the upper triangular and diagonal part of the in diagonal portion of the local
1916            (possibly different for each block row) or PETSC_NULL.  You must leave room
1917            for the diagonal entry even if it is zero.
1918 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1919            submatrix (same for all local rows).
1920 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1921            off-diagonal portion of the local submatrix (possibly different for
1922            each block row) or PETSC_NULL.
1923 
1924 
1925    Options Database Keys:
1926 .   -mat_no_unroll - uses code that does not unroll the loops in the
1927                      block calculations (much slower)
1928 .   -mat_block_size - size of the blocks to use
1929 
1930    Notes:
1931 
1932    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1933    than it must be used on all processors that share the object for that argument.
1934 
1935    If the *_nnz parameter is given then the *_nz parameter is ignored
1936 
1937    Storage Information:
1938    For a square global matrix we define each processor's diagonal portion
1939    to be its local rows and the corresponding columns (a square submatrix);
1940    each processor's off-diagonal portion encompasses the remainder of the
1941    local matrix (a rectangular submatrix).
1942 
1943    The user can specify preallocated storage for the diagonal part of
1944    the local submatrix with either d_nz or d_nnz (not both).  Set
1945    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1946    memory allocation.  Likewise, specify preallocated storage for the
1947    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1948 
1949    You can call MatGetInfo() to get information on how effective the preallocation was;
1950    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1951    You can also run with the option -info and look for messages with the string
1952    malloc in them to see if additional memory allocation was needed.
1953 
1954    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1955    the figure below we depict these three local rows and all columns (0-11).
1956 
1957 .vb
1958            0 1 2 3 4 5 6 7 8 9 10 11
1959           -------------------
1960    row 3  |  o o o d d d o o o o o o
1961    row 4  |  o o o d d d o o o o o o
1962    row 5  |  o o o d d d o o o o o o
1963           -------------------
1964 .ve
1965 
1966    Thus, any entries in the d locations are stored in the d (diagonal)
1967    submatrix, and any entries in the o locations are stored in the
1968    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1969    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1970 
1971    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1972    plus the diagonal part of the d matrix,
1973    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1974    In general, for PDE problems in which most nonzeros are near the diagonal,
1975    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1976    or you will get TERRIBLE performance; see the users' manual chapter on
1977    matrices.
1978 
1979    Level: intermediate
1980 
1981 .keywords: matrix, block, aij, compressed row, sparse, parallel
1982 
1983 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1984 @*/
1985 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1986 {
1987   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1988 
1989   PetscFunctionBegin;
1990   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1991   if (f) {
1992     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1993   }
1994   PetscFunctionReturn(0);
1995 }
1996 
1997 #undef __FUNCT__
1998 #define __FUNCT__ "MatCreateMPISBAIJ"
1999 /*@C
2000    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2001    (block compressed row).  For good matrix assembly performance
2002    the user should preallocate the matrix storage by setting the parameters
2003    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2004    performance can be increased by more than a factor of 50.
2005 
2006    Collective on MPI_Comm
2007 
2008    Input Parameters:
2009 +  comm - MPI communicator
2010 .  bs   - size of blockk
2011 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2012            This value should be the same as the local size used in creating the
2013            y vector for the matrix-vector product y = Ax.
2014 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2015            This value should be the same as the local size used in creating the
2016            x vector for the matrix-vector product y = Ax.
2017 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2018 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2019 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2020            submatrix  (same for all local rows)
2021 .  d_nnz - array containing the number of block nonzeros in the various block rows
2022            in the upper triangular portion of the in diagonal portion of the local
2023            (possibly different for each block block row) or PETSC_NULL.
2024            You must leave room for the diagonal entry even if it is zero.
2025 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2026            submatrix (same for all local rows).
2027 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2028            off-diagonal portion of the local submatrix (possibly different for
2029            each block row) or PETSC_NULL.
2030 
2031    Output Parameter:
2032 .  A - the matrix
2033 
2034    Options Database Keys:
2035 .   -mat_no_unroll - uses code that does not unroll the loops in the
2036                      block calculations (much slower)
2037 .   -mat_block_size - size of the blocks to use
2038 .   -mat_mpi - use the parallel matrix data structures even on one processor
2039                (defaults to using SeqBAIJ format on one processor)
2040 
2041    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2042    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2043    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2044 
2045    Notes:
2046    The number of rows and columns must be divisible by blocksize.
2047    This matrix type does not support complex Hermitian operation.
2048 
2049    The user MUST specify either the local or global matrix dimensions
2050    (possibly both).
2051 
2052    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2053    than it must be used on all processors that share the object for that argument.
2054 
2055    If the *_nnz parameter is given then the *_nz parameter is ignored
2056 
2057    Storage Information:
2058    For a square global matrix we define each processor's diagonal portion
2059    to be its local rows and the corresponding columns (a square submatrix);
2060    each processor's off-diagonal portion encompasses the remainder of the
2061    local matrix (a rectangular submatrix).
2062 
2063    The user can specify preallocated storage for the diagonal part of
2064    the local submatrix with either d_nz or d_nnz (not both).  Set
2065    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2066    memory allocation.  Likewise, specify preallocated storage for the
2067    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2068 
2069    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2070    the figure below we depict these three local rows and all columns (0-11).
2071 
2072 .vb
2073            0 1 2 3 4 5 6 7 8 9 10 11
2074           -------------------
2075    row 3  |  o o o d d d o o o o o o
2076    row 4  |  o o o d d d o o o o o o
2077    row 5  |  o o o d d d o o o o o o
2078           -------------------
2079 .ve
2080 
2081    Thus, any entries in the d locations are stored in the d (diagonal)
2082    submatrix, and any entries in the o locations are stored in the
2083    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2084    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2085 
2086    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2087    plus the diagonal part of the d matrix,
2088    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2089    In general, for PDE problems in which most nonzeros are near the diagonal,
2090    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2091    or you will get TERRIBLE performance; see the users' manual chapter on
2092    matrices.
2093 
2094    Level: intermediate
2095 
2096 .keywords: matrix, block, aij, compressed row, sparse, parallel
2097 
2098 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2099 @*/
2100 
2101 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2102 {
2103   PetscErrorCode ierr;
2104   PetscMPIInt    size;
2105 
2106   PetscFunctionBegin;
2107   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2108   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2110   if (size > 1) {
2111     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2112     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2113   } else {
2114     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2115     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2116   }
2117   PetscFunctionReturn(0);
2118 }
2119 
2120 
2121 #undef __FUNCT__
2122 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2123 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2124 {
2125   Mat            mat;
2126   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2127   PetscErrorCode ierr;
2128   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2129   PetscScalar    *array;
2130 
2131   PetscFunctionBegin;
2132   *newmat       = 0;
2133   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2134   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2135   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2136   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2137   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2138   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2139 
2140   mat->factortype   = matin->factortype;
2141   mat->preallocated = PETSC_TRUE;
2142   mat->assembled    = PETSC_TRUE;
2143   mat->insertmode   = NOT_SET_VALUES;
2144 
2145   a = (Mat_MPISBAIJ*)mat->data;
2146   a->bs2   = oldmat->bs2;
2147   a->mbs   = oldmat->mbs;
2148   a->nbs   = oldmat->nbs;
2149   a->Mbs   = oldmat->Mbs;
2150   a->Nbs   = oldmat->Nbs;
2151 
2152 
2153   a->size         = oldmat->size;
2154   a->rank         = oldmat->rank;
2155   a->donotstash   = oldmat->donotstash;
2156   a->roworiented  = oldmat->roworiented;
2157   a->rowindices   = 0;
2158   a->rowvalues    = 0;
2159   a->getrowactive = PETSC_FALSE;
2160   a->barray       = 0;
2161   a->rstartbs    = oldmat->rstartbs;
2162   a->rendbs      = oldmat->rendbs;
2163   a->cstartbs    = oldmat->cstartbs;
2164   a->cendbs      = oldmat->cendbs;
2165 
2166   /* hash table stuff */
2167   a->ht           = 0;
2168   a->hd           = 0;
2169   a->ht_size      = 0;
2170   a->ht_flag      = oldmat->ht_flag;
2171   a->ht_fact      = oldmat->ht_fact;
2172   a->ht_total_ct  = 0;
2173   a->ht_insert_ct = 0;
2174 
2175   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2176   if (oldmat->colmap) {
2177 #if defined (PETSC_USE_CTABLE)
2178     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2179 #else
2180     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2181     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2182     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2183 #endif
2184   } else a->colmap = 0;
2185 
2186   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2187     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2188     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2189     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2190   } else a->garray = 0;
2191 
2192   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2193   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2194   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2195   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2196   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2197 
2198   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2199   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2200   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2201   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2202 
2203   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2204   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2205   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2206   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2207   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2208   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2209   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2210   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2211   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2212   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2213   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2214   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2215   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2216 
2217   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2218   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2219   a->sMvctx = oldmat->sMvctx;
2220   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2221 
2222   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2223   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2224   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2225   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2226   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2227   *newmat = mat;
2228   PetscFunctionReturn(0);
2229 }
2230 
2231 #undef __FUNCT__
2232 #define __FUNCT__ "MatLoad_MPISBAIJ"
2233 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2234 {
2235   PetscErrorCode ierr;
2236   PetscInt       i,nz,j,rstart,rend;
2237   PetscScalar    *vals,*buf;
2238   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2239   MPI_Status     status;
2240   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2241   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2242   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2243   PetscInt       bs=1,Mbs,mbs,extra_rows;
2244   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2245   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2246   int            fd;
2247 
2248   PetscFunctionBegin;
2249   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2250     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2251   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2252 
2253   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2254   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2255   if (!rank) {
2256     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2257     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2258     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2259     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2260   }
2261 
2262   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2263 
2264   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2265   M = header[1]; N = header[2];
2266 
2267   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2268   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2269   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2270 
2271   /* If global sizes are set, check if they are consistent with that given in the file */
2272   if (sizesset) {
2273     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2274   }
2275   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2276   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2277 
2278   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2279 
2280   /*
2281      This code adds extra rows to make sure the number of rows is
2282      divisible by the blocksize
2283   */
2284   Mbs        = M/bs;
2285   extra_rows = bs - M + bs*(Mbs);
2286   if (extra_rows == bs) extra_rows = 0;
2287   else                  Mbs++;
2288   if (extra_rows &&!rank) {
2289     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2290   }
2291 
2292   /* determine ownership of all rows */
2293   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2294     mbs        = Mbs/size + ((Mbs % size) > rank);
2295     m          = mbs*bs;
2296   } else { /* User Set */
2297     m          = newmat->rmap->n;
2298     mbs        = m/bs;
2299   }
2300   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2301   mmbs       = PetscMPIIntCast(mbs);
2302   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2303   rowners[0] = 0;
2304   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2305   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2306   rstart = rowners[rank];
2307   rend   = rowners[rank+1];
2308 
2309   /* distribute row lengths to all processors */
2310   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2311   if (!rank) {
2312     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2313     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2314     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2315     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2316     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2317     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2318     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2319   } else {
2320     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2321   }
2322 
2323   if (!rank) {   /* procs[0] */
2324     /* calculate the number of nonzeros on each processor */
2325     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2326     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2327     for (i=0; i<size; i++) {
2328       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2329         procsnz[i] += rowlengths[j];
2330       }
2331     }
2332     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2333 
2334     /* determine max buffer needed and allocate it */
2335     maxnz = 0;
2336     for (i=0; i<size; i++) {
2337       maxnz = PetscMax(maxnz,procsnz[i]);
2338     }
2339     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2340 
2341     /* read in my part of the matrix column indices  */
2342     nz     = procsnz[0];
2343     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2344     mycols = ibuf;
2345     if (size == 1)  nz -= extra_rows;
2346     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2347     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2348 
2349     /* read in every ones (except the last) and ship off */
2350     for (i=1; i<size-1; i++) {
2351       nz   = procsnz[i];
2352       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2353       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2354     }
2355     /* read in the stuff for the last proc */
2356     if (size != 1) {
2357       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2358       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2359       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2360       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2361     }
2362     ierr = PetscFree(cols);CHKERRQ(ierr);
2363   } else {  /* procs[i], i>0 */
2364     /* determine buffer space needed for message */
2365     nz = 0;
2366     for (i=0; i<m; i++) {
2367       nz += locrowlens[i];
2368     }
2369     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2370     mycols = ibuf;
2371     /* receive message of column indices*/
2372     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2373     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2374     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2375   }
2376 
2377   /* loop over local rows, determining number of off diagonal entries */
2378   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2379   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2380   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2381   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2382   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2383   rowcount = 0;
2384   nzcount  = 0;
2385   for (i=0; i<mbs; i++) {
2386     dcount  = 0;
2387     odcount = 0;
2388     for (j=0; j<bs; j++) {
2389       kmax = locrowlens[rowcount];
2390       for (k=0; k<kmax; k++) {
2391         tmp = mycols[nzcount++]/bs; /* block col. index */
2392         if (!mask[tmp]) {
2393           mask[tmp] = 1;
2394           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2395           else masked1[dcount++] = tmp; /* entry in diag portion */
2396         }
2397       }
2398       rowcount++;
2399     }
2400 
2401     dlens[i]  = dcount;  /* d_nzz[i] */
2402     odlens[i] = odcount; /* o_nzz[i] */
2403 
2404     /* zero out the mask elements we set */
2405     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2406     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2407   }
2408     if (!sizesset) {
2409     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2410   }
2411   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2412   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2413 
2414   if (!rank) {
2415     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2416     /* read in my part of the matrix numerical values  */
2417     nz = procsnz[0];
2418     vals = buf;
2419     mycols = ibuf;
2420     if (size == 1)  nz -= extra_rows;
2421     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2422     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2423 
2424     /* insert into matrix */
2425     jj      = rstart*bs;
2426     for (i=0; i<m; i++) {
2427       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2428       mycols += locrowlens[i];
2429       vals   += locrowlens[i];
2430       jj++;
2431     }
2432 
2433     /* read in other processors (except the last one) and ship out */
2434     for (i=1; i<size-1; i++) {
2435       nz   = procsnz[i];
2436       vals = buf;
2437       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2438       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2439     }
2440     /* the last proc */
2441     if (size != 1){
2442       nz   = procsnz[i] - extra_rows;
2443       vals = buf;
2444       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2445       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2446       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2447     }
2448     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2449 
2450   } else {
2451     /* receive numeric values */
2452     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2453 
2454     /* receive message of values*/
2455     vals   = buf;
2456     mycols = ibuf;
2457     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2458     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2459     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2460 
2461     /* insert into matrix */
2462     jj      = rstart*bs;
2463     for (i=0; i<m; i++) {
2464       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2465       mycols += locrowlens[i];
2466       vals   += locrowlens[i];
2467       jj++;
2468     }
2469   }
2470 
2471   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2472   ierr = PetscFree(buf);CHKERRQ(ierr);
2473   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2474   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2475   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2476   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2477   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2478   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2479   PetscFunctionReturn(0);
2480 }
2481 
2482 #undef __FUNCT__
2483 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2484 /*XXXXX@
2485    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2486 
2487    Input Parameters:
2488 .  mat  - the matrix
2489 .  fact - factor
2490 
2491    Not Collective on Mat, each process can have a different hash factor
2492 
2493    Level: advanced
2494 
2495   Notes:
2496    This can also be set by the command line option: -mat_use_hash_table fact
2497 
2498 .keywords: matrix, hashtable, factor, HT
2499 
2500 .seealso: MatSetOption()
2501 @XXXXX*/
2502 
2503 
2504 #undef __FUNCT__
2505 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2506 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2507 {
2508   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2509   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2510   PetscReal      atmp;
2511   PetscReal      *work,*svalues,*rvalues;
2512   PetscErrorCode ierr;
2513   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2514   PetscMPIInt    rank,size;
2515   PetscInt       *rowners_bs,dest,count,source;
2516   PetscScalar    *va;
2517   MatScalar      *ba;
2518   MPI_Status     stat;
2519 
2520   PetscFunctionBegin;
2521   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2522   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2523   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2524 
2525   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2526   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2527 
2528   bs   = A->rmap->bs;
2529   mbs  = a->mbs;
2530   Mbs  = a->Mbs;
2531   ba   = b->a;
2532   bi   = b->i;
2533   bj   = b->j;
2534 
2535   /* find ownerships */
2536   rowners_bs = A->rmap->range;
2537 
2538   /* each proc creates an array to be distributed */
2539   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2540   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2541 
2542   /* row_max for B */
2543   if (rank != size-1){
2544     for (i=0; i<mbs; i++) {
2545       ncols = bi[1] - bi[0]; bi++;
2546       brow  = bs*i;
2547       for (j=0; j<ncols; j++){
2548         bcol = bs*(*bj);
2549         for (kcol=0; kcol<bs; kcol++){
2550           col = bcol + kcol;                 /* local col index */
2551           col += rowners_bs[rank+1];      /* global col index */
2552           for (krow=0; krow<bs; krow++){
2553             atmp = PetscAbsScalar(*ba); ba++;
2554             row = brow + krow;    /* local row index */
2555             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2556             if (work[col] < atmp) work[col] = atmp;
2557           }
2558         }
2559         bj++;
2560       }
2561     }
2562 
2563     /* send values to its owners */
2564     for (dest=rank+1; dest<size; dest++){
2565       svalues = work + rowners_bs[dest];
2566       count   = rowners_bs[dest+1]-rowners_bs[dest];
2567       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2568     }
2569   }
2570 
2571   /* receive values */
2572   if (rank){
2573     rvalues = work;
2574     count   = rowners_bs[rank+1]-rowners_bs[rank];
2575     for (source=0; source<rank; source++){
2576       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2577       /* process values */
2578       for (i=0; i<count; i++){
2579         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2580       }
2581     }
2582   }
2583 
2584   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2585   ierr = PetscFree(work);CHKERRQ(ierr);
2586   PetscFunctionReturn(0);
2587 }
2588 
2589 #undef __FUNCT__
2590 #define __FUNCT__ "MatSOR_MPISBAIJ"
2591 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2592 {
2593   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2594   PetscErrorCode    ierr;
2595   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2596   PetscScalar       *x,*ptr,*from;
2597   Vec               bb1;
2598   const PetscScalar *b;
2599 
2600   PetscFunctionBegin;
2601   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2602   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2603 
2604   if (flag == SOR_APPLY_UPPER) {
2605     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2606     PetscFunctionReturn(0);
2607   }
2608 
2609   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2610     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2611       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2612       its--;
2613     }
2614 
2615     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2616     while (its--){
2617 
2618       /* lower triangular part: slvec0b = - B^T*xx */
2619       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2620 
2621       /* copy xx into slvec0a */
2622       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2623       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2624       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2625       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2626 
2627       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2628 
2629       /* copy bb into slvec1a */
2630       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2631       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2632       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2633       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2634 
2635       /* set slvec1b = 0 */
2636       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2637 
2638       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2639       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2640       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2641       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2642 
2643       /* upper triangular part: bb1 = bb1 - B*x */
2644       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2645 
2646       /* local diagonal sweep */
2647       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2648     }
2649     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2650   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2651     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2652   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2653     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2654   } else if (flag & SOR_EISENSTAT) {
2655     Vec               xx1;
2656     PetscTruth        hasop;
2657     const PetscScalar *diag;
2658     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2659     PetscInt          i,n;
2660 
2661     if (!mat->xx1) {
2662       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2663       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2664     }
2665     xx1 = mat->xx1;
2666     bb1 = mat->bb1;
2667 
2668     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2669 
2670     if (!mat->diag) {
2671       /* this is wrong for same matrix with new nonzero values */
2672       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2673       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2674     }
2675     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2676 
2677     if (hasop) {
2678       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2679       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2680     } else {
2681       /*
2682           These two lines are replaced by code that may be a bit faster for a good compiler
2683       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2684       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2685       */
2686       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2687       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2688       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2689       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2690       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2691       if (omega == 1.0) {
2692 	for (i=0; i<n; i++) {
2693 	  sl[i] = b[i] - diag[i]*x[i];
2694 	}
2695         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2696       } else {
2697 	for (i=0; i<n; i++) {
2698 	  sl[i] = b[i] + scale*diag[i]*x[i];
2699 	}
2700         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2701       }
2702       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2703       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2704       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2705       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2706     }
2707 
2708     /* multiply off-diagonal portion of matrix */
2709     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2710     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2711     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2712     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2713     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2714     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2715     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2716     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2717     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2719 
2720     /* local sweep */
2721     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2722     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2723   } else {
2724     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2725   }
2726   PetscFunctionReturn(0);
2727 }
2728 
2729 #undef __FUNCT__
2730 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2731 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2732 {
2733   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2734   PetscErrorCode ierr;
2735   Vec            lvec1,bb1;
2736 
2737   PetscFunctionBegin;
2738   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2739   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2740 
2741   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2742     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2743       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2744       its--;
2745     }
2746 
2747     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2748     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2749     while (its--){
2750       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2751 
2752       /* lower diagonal part: bb1 = bb - B^T*xx */
2753       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2754       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2755 
2756       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2757       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2758       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2759 
2760       /* upper diagonal part: bb1 = bb1 - B*x */
2761       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2762       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2763 
2764       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2765 
2766       /* diagonal sweep */
2767       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2768     }
2769     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2770     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2771   } else {
2772     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2773   }
2774   PetscFunctionReturn(0);
2775 }
2776 
2777 #undef __FUNCT__
2778 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2779 /*@
2780      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2781          CSR format the local rows.
2782 
2783    Collective on MPI_Comm
2784 
2785    Input Parameters:
2786 +  comm - MPI communicator
2787 .  bs - the block size, only a block size of 1 is supported
2788 .  m - number of local rows (Cannot be PETSC_DECIDE)
2789 .  n - This value should be the same as the local size used in creating the
2790        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2791        calculated if N is given) For square matrices n is almost always m.
2792 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2793 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2794 .   i - row indices
2795 .   j - column indices
2796 -   a - matrix values
2797 
2798    Output Parameter:
2799 .   mat - the matrix
2800 
2801    Level: intermediate
2802 
2803    Notes:
2804        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2805      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2806      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2807 
2808        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2809 
2810 .keywords: matrix, aij, compressed row, sparse, parallel
2811 
2812 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2813           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2814 @*/
2815 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2816 {
2817   PetscErrorCode ierr;
2818 
2819 
2820  PetscFunctionBegin;
2821   if (i[0]) {
2822     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2823   }
2824   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2825   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2826   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2827   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2828   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 
2833 #undef __FUNCT__
2834 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2835 /*@C
2836    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2837    (the default parallel PETSc format).
2838 
2839    Collective on MPI_Comm
2840 
2841    Input Parameters:
2842 +  A - the matrix
2843 .  bs - the block size
2844 .  i - the indices into j for the start of each local row (starts with zero)
2845 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2846 -  v - optional values in the matrix
2847 
2848    Level: developer
2849 
2850 .keywords: matrix, aij, compressed row, sparse, parallel
2851 
2852 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2853 @*/
2854 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2855 {
2856   PetscErrorCode ierr,(*f)(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]);
2857 
2858   PetscFunctionBegin;
2859   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",(void (**)(void))&f);CHKERRQ(ierr);
2860   if (f) {
2861     ierr = (*f)(B,bs,i,j,v);CHKERRQ(ierr);
2862   }
2863   PetscFunctionReturn(0);
2864 }
2865 
2866 
2867