xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision f73d5cc4fab9985badaa1a29b152f456bf59ad34)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscBool      other_disassembled;
522   PetscMPIInt    n;
523   PetscBool      r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
634       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_INFO) {
637       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
640       PetscFunctionReturn(0);
641     }
642   }
643 
644   if (isdraw) {
645     PetscDraw  draw;
646     PetscBool  isnull;
647     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
648     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
654   } else {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
671     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
676     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++; a += bs;
687         }
688       }
689     }
690     /* copy over the B part */
691     Bloc = (Mat_SeqBAIJ*)baij->B->data;
692     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
693     for (i=0; i<mbs; i++) {
694 
695       rvals[0] = bs*(baij->rstartbs + i);
696       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
697       for (j=ai[i]; j<ai[i+1]; j++) {
698         col = baij->garray[aj[j]]*bs;
699         for (k=0; k<bs; k++) {
700           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
701           col++; a += bs;
702         }
703       }
704     }
705     ierr = PetscFree(rvals);CHKERRQ(ierr);
706     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     /*
709        Everyone has to call to draw the matrix since the graphics waits are
710        synchronized across all processors that share the PetscDraw object
711     */
712     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
713     if (!rank) {
714       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
715           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
716       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
717       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
718     }
719     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
720     ierr = MatDestroy(&A);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPISBAIJ"
727 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
728 {
729   PetscErrorCode ierr;
730   PetscBool      iascii,isdraw,issocket,isbinary;
731 
732   PetscFunctionBegin;
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
735   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
736   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
737   if (iascii || isdraw || issocket || isbinary) {
738     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
739   } else {
740     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined (PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(((PetscObject)A)->comm);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952 
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) { max = tmp; }
1011     }
1012     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap  = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart)
1049               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050             else break;
1051           }
1052           imark = i;
1053         }
1054         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1055         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056       }
1057     } else {
1058       if (idx) *idx = 0;
1059       if (v)   *v   = 0;
1060     }
1061   }
1062   *nz = nztot;
1063   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1076   baij->getrowactive = PETSC_FALSE;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1082 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1086 
1087   PetscFunctionBegin;
1088   aA->getrow_utriangular = PETSC_TRUE;
1089   PetscFunctionReturn(0);
1090 }
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1105 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1106 {
1107   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1118 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->rmap->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1210     break;
1211   case MAT_NEW_DIAGONALS:
1212     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = flg;
1216     break;
1217   case MAT_USE_HASH_TABLE:
1218     a->ht_flag = flg;
1219     break;
1220   case MAT_HERMITIAN:
1221     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set                         = PETSC_TRUE;
1227     A->spd                             = flg;
1228     if (flg) {
1229       A->symmetric                     = PETSC_TRUE;
1230       A->structurally_symmetric        = PETSC_TRUE;
1231       A->symmetric_set                 = PETSC_TRUE;
1232       A->structurally_symmetric_set    = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscBool      flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1286   }
1287   if (!ll) PetscFunctionReturn(0);
1288 
1289   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1290   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1291 
1292   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1293   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1294 
1295   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296 
1297   /* left diagonalscale the off-diagonal part */
1298   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1299 
1300   /* scale the diagonal part */
1301   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1302 
1303   /* right diagonalscale the off-diagonal part */
1304   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1311 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatEqual_MPISBAIJ"
1325 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1326 {
1327   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1328   Mat            a,b,c,d;
1329   PetscBool      flg;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPISBAIJ"
1346 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1350   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1351 
1352   PetscFunctionBegin;
1353   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1354   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1355     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1358   } else {
1359     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1360     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1367 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1368 {
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa = (Mat_SeqSBAIJ *)xx->A->data;
1390     ya = (Mat_SeqSBAIJ *)yy->A->data;
1391     bnz = PetscBLASIntCast(xa->nz);
1392     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1393     xb = (Mat_SeqBAIJ *)xx->B->data;
1394     yb = (Mat_SeqBAIJ *)yy->B->data;
1395     bnz = PetscBLASIntCast(xb->nz);
1396     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1397   } else {
1398     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1407 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1408 {
1409   PetscErrorCode ierr;
1410   PetscInt       i;
1411   PetscBool      flg;
1412 
1413   PetscFunctionBegin;
1414   for (i=0; i<n; i++) {
1415     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1416     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1417   }
1418   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 
1423 /* -------------------------------------------------------------------*/
1424 static struct _MatOps MatOps_Values = {
1425        MatSetValues_MPISBAIJ,
1426        MatGetRow_MPISBAIJ,
1427        MatRestoreRow_MPISBAIJ,
1428        MatMult_MPISBAIJ,
1429 /* 4*/ MatMultAdd_MPISBAIJ,
1430        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1431        MatMultAdd_MPISBAIJ,
1432        0,
1433        0,
1434        0,
1435 /*10*/ 0,
1436        0,
1437        0,
1438        MatSOR_MPISBAIJ,
1439        MatTranspose_MPISBAIJ,
1440 /*15*/ MatGetInfo_MPISBAIJ,
1441        MatEqual_MPISBAIJ,
1442        MatGetDiagonal_MPISBAIJ,
1443        MatDiagonalScale_MPISBAIJ,
1444        MatNorm_MPISBAIJ,
1445 /*20*/ MatAssemblyBegin_MPISBAIJ,
1446        MatAssemblyEnd_MPISBAIJ,
1447        MatSetOption_MPISBAIJ,
1448        MatZeroEntries_MPISBAIJ,
1449 /*24*/ 0,
1450        0,
1451        0,
1452        0,
1453        0,
1454 /*29*/ MatSetUp_MPISBAIJ,
1455        0,
1456        0,
1457        0,
1458        0,
1459 /*34*/ MatDuplicate_MPISBAIJ,
1460        0,
1461        0,
1462        0,
1463        0,
1464 /*39*/ MatAXPY_MPISBAIJ,
1465        MatGetSubMatrices_MPISBAIJ,
1466        MatIncreaseOverlap_MPISBAIJ,
1467        MatGetValues_MPISBAIJ,
1468        MatCopy_MPISBAIJ,
1469 /*44*/ 0,
1470        MatScale_MPISBAIJ,
1471        0,
1472        0,
1473        0,
1474 /*49*/ 0,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*54*/ 0,
1480        0,
1481        MatSetUnfactored_MPISBAIJ,
1482        0,
1483        MatSetValuesBlocked_MPISBAIJ,
1484 /*59*/ 0,
1485        0,
1486        0,
1487        0,
1488        0,
1489 /*64*/ 0,
1490        0,
1491        0,
1492        0,
1493        0,
1494 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1495        0,
1496        0,
1497        0,
1498        0,
1499 /*74*/ 0,
1500        0,
1501        0,
1502        0,
1503        0,
1504 /*79*/ 0,
1505        0,
1506        0,
1507        0,
1508        MatLoad_MPISBAIJ,
1509 /*84*/ 0,
1510        0,
1511        0,
1512        0,
1513        0,
1514 /*89*/ 0,
1515        0,
1516        0,
1517        0,
1518        0,
1519 /*94*/ 0,
1520        0,
1521        0,
1522        0,
1523        0,
1524 /*99*/ 0,
1525        0,
1526        0,
1527        0,
1528        0,
1529 /*104*/0,
1530        MatRealPart_MPISBAIJ,
1531        MatImaginaryPart_MPISBAIJ,
1532        MatGetRowUpperTriangular_MPISBAIJ,
1533        MatRestoreRowUpperTriangular_MPISBAIJ,
1534 /*109*/0,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*114*/0,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*119*/0,
1545        0,
1546        0,
1547        0
1548 };
1549 
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1554 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1555 {
1556   PetscFunctionBegin;
1557   *a = ((Mat_MPISBAIJ *)A->data)->A;
1558   PetscFunctionReturn(0);
1559 }
1560 EXTERN_C_END
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1565 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1566 {
1567   Mat_MPISBAIJ   *b;
1568   PetscErrorCode ierr;
1569   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1570 
1571   PetscFunctionBegin;
1572   if (bs < 0){
1573     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1574       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1575     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1576     bs   = PetscAbs(bs);
1577   }
1578   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1579   bs = newbs;
1580 
1581   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1582   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1583   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1584   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1585 
1586   B->rmap->bs = B->cmap->bs = bs;
1587   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1588   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1589 
1590   if (d_nnz) {
1591     for (i=0; i<B->rmap->n/bs; i++) {
1592       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1593     }
1594   }
1595   if (o_nnz) {
1596     for (i=0; i<B->rmap->n/bs; i++) {
1597       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1598     }
1599   }
1600 
1601   b   = (Mat_MPISBAIJ*)B->data;
1602   mbs = B->rmap->n/bs;
1603   Mbs = B->rmap->N/bs;
1604   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1605 
1606   B->rmap->bs  = bs;
1607   b->bs2 = bs*bs;
1608   b->mbs = mbs;
1609   b->nbs = mbs;
1610   b->Mbs = Mbs;
1611   b->Nbs = Mbs;
1612 
1613   for (i=0; i<=b->size; i++) {
1614     b->rangebs[i] = B->rmap->range[i]/bs;
1615   }
1616   b->rstartbs = B->rmap->rstart/bs;
1617   b->rendbs   = B->rmap->rend/bs;
1618 
1619   b->cstartbs = B->cmap->rstart/bs;
1620   b->cendbs   = B->cmap->rend/bs;
1621 
1622   if (!B->preallocated) {
1623     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1624     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1625     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1626     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1627     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1628     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1629     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1630     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1631     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1632   }
1633 
1634   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1635   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1636   B->preallocated = PETSC_TRUE;
1637   PetscFunctionReturn(0);
1638 }
1639 EXTERN_C_END
1640 
1641 EXTERN_C_BEGIN
1642 #undef __FUNCT__
1643 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1644 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1645 {
1646   PetscInt       m,rstart,cstart,cend;
1647   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1648   const PetscInt *JJ=0;
1649   PetscScalar    *values=0;
1650   PetscErrorCode ierr;
1651 
1652   PetscFunctionBegin;
1653 
1654   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1655   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1656   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1657   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1658   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1659   m      = B->rmap->n/bs;
1660   rstart = B->rmap->rstart/bs;
1661   cstart = B->cmap->rstart/bs;
1662   cend   = B->cmap->rend/bs;
1663 
1664   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1665   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1666   for (i=0; i<m; i++) {
1667     nz = ii[i+1] - ii[i];
1668     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1669     nz_max = PetscMax(nz_max,nz);
1670     JJ  = jj + ii[i];
1671     for (j=0; j<nz; j++) {
1672       if (*JJ >= cstart) break;
1673       JJ++;
1674     }
1675     d = 0;
1676     for (; j<nz; j++) {
1677       if (*JJ++ >= cend) break;
1678       d++;
1679     }
1680     d_nnz[i] = d;
1681     o_nnz[i] = nz - d;
1682   }
1683   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1684   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1685 
1686   values = (PetscScalar*)V;
1687   if (!values) {
1688     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1689     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1690   }
1691   for (i=0; i<m; i++) {
1692     PetscInt          row    = i + rstart;
1693     PetscInt          ncols  = ii[i+1] - ii[i];
1694     const PetscInt    *icols = jj + ii[i];
1695     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1696     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1697   }
1698 
1699   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1700   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1701   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1702   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1703   PetscFunctionReturn(0);
1704 }
1705 EXTERN_C_END
1706 
1707 EXTERN_C_BEGIN
1708 #if defined(PETSC_HAVE_MUMPS)
1709 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1710 #endif
1711 #if defined(PETSC_HAVE_SPOOLES)
1712 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1713 #endif
1714 #if defined(PETSC_HAVE_PASTIX)
1715 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1716 #endif
1717 EXTERN_C_END
1718 
1719 /*MC
1720    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1721    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1722    the matrix is stored.
1723 
1724   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1725   can call MatSetOption(Mat, MAT_HERMITIAN);
1726 
1727    Options Database Keys:
1728 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1729 
1730   Level: beginner
1731 
1732 .seealso: MatCreateMPISBAIJ
1733 M*/
1734 
1735 EXTERN_C_BEGIN
1736 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*);
1737 EXTERN_C_END
1738 
1739 EXTERN_C_BEGIN
1740 #undef __FUNCT__
1741 #define __FUNCT__ "MatCreate_MPISBAIJ"
1742 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1743 {
1744   Mat_MPISBAIJ   *b;
1745   PetscErrorCode ierr;
1746   PetscBool      flg;
1747 
1748   PetscFunctionBegin;
1749 
1750   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1751   B->data = (void*)b;
1752   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1753 
1754   B->ops->destroy    = MatDestroy_MPISBAIJ;
1755   B->ops->view       = MatView_MPISBAIJ;
1756   B->assembled       = PETSC_FALSE;
1757 
1758   B->insertmode = NOT_SET_VALUES;
1759   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1760   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1761 
1762   /* build local table of row and column ownerships */
1763   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1764 
1765   /* build cache for off array entries formed */
1766   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1767   b->donotstash  = PETSC_FALSE;
1768   b->colmap      = PETSC_NULL;
1769   b->garray      = PETSC_NULL;
1770   b->roworiented = PETSC_TRUE;
1771 
1772   /* stuff used in block assembly */
1773   b->barray       = 0;
1774 
1775   /* stuff used for matrix vector multiply */
1776   b->lvec         = 0;
1777   b->Mvctx        = 0;
1778   b->slvec0       = 0;
1779   b->slvec0b      = 0;
1780   b->slvec1       = 0;
1781   b->slvec1a      = 0;
1782   b->slvec1b      = 0;
1783   b->sMvctx       = 0;
1784 
1785   /* stuff for MatGetRow() */
1786   b->rowindices   = 0;
1787   b->rowvalues    = 0;
1788   b->getrowactive = PETSC_FALSE;
1789 
1790   /* hash table stuff */
1791   b->ht           = 0;
1792   b->hd           = 0;
1793   b->ht_size      = 0;
1794   b->ht_flag      = PETSC_FALSE;
1795   b->ht_fact      = 0;
1796   b->ht_total_ct  = 0;
1797   b->ht_insert_ct = 0;
1798 
1799   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1800   b->ijonly       = PETSC_FALSE;
1801 
1802   b->in_loc       = 0;
1803   b->v_loc        = 0;
1804   b->n_loc        = 0;
1805   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1806     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1807     if (flg) {
1808       PetscReal fact = 1.39;
1809       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1810       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1811       if (fact <= 1.0) fact = 1.39;
1812       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1813       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1814     }
1815   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1816 
1817 #if defined(PETSC_HAVE_PASTIX)
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1819 					   "MatGetFactor_mpisbaij_pastix",
1820 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1821 #endif
1822 #if defined(PETSC_HAVE_MUMPS)
1823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1824                                      "MatGetFactor_sbaij_mumps",
1825                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1826 #endif
1827 #if defined(PETSC_HAVE_SPOOLES)
1828   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1829                                      "MatGetFactor_mpisbaij_spooles",
1830                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1831 #endif
1832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1833                                      "MatStoreValues_MPISBAIJ",
1834                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1836                                      "MatRetrieveValues_MPISBAIJ",
1837                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1839                                      "MatGetDiagonalBlock_MPISBAIJ",
1840                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1842                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1843                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1845                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1846                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1848                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1849                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1850 
1851   B->symmetric                  = PETSC_TRUE;
1852   B->structurally_symmetric     = PETSC_TRUE;
1853   B->symmetric_set              = PETSC_TRUE;
1854   B->structurally_symmetric_set = PETSC_TRUE;
1855   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1856   PetscFunctionReturn(0);
1857 }
1858 EXTERN_C_END
1859 
1860 /*MC
1861    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1862 
1863    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1864    and MATMPISBAIJ otherwise.
1865 
1866    Options Database Keys:
1867 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1868 
1869   Level: beginner
1870 
1871 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1872 M*/
1873 
1874 #undef __FUNCT__
1875 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1876 /*@C
1877    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1878    the user should preallocate the matrix storage by setting the parameters
1879    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1880    performance can be increased by more than a factor of 50.
1881 
1882    Collective on Mat
1883 
1884    Input Parameters:
1885 +  A - the matrix
1886 .  bs   - size of blockk
1887 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1888            submatrix  (same for all local rows)
1889 .  d_nnz - array containing the number of block nonzeros in the various block rows
1890            in the upper triangular and diagonal part of the in diagonal portion of the local
1891            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1892            for the diagonal entry and set a value even if it is zero.
1893 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1894            submatrix (same for all local rows).
1895 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1896            off-diagonal portion of the local submatrix that is right of the diagonal
1897            (possibly different for each block row) or PETSC_NULL.
1898 
1899 
1900    Options Database Keys:
1901 .   -mat_no_unroll - uses code that does not unroll the loops in the
1902                      block calculations (much slower)
1903 .   -mat_block_size - size of the blocks to use
1904 
1905    Notes:
1906 
1907    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1908    than it must be used on all processors that share the object for that argument.
1909 
1910    If the *_nnz parameter is given then the *_nz parameter is ignored
1911 
1912    Storage Information:
1913    For a square global matrix we define each processor's diagonal portion
1914    to be its local rows and the corresponding columns (a square submatrix);
1915    each processor's off-diagonal portion encompasses the remainder of the
1916    local matrix (a rectangular submatrix).
1917 
1918    The user can specify preallocated storage for the diagonal part of
1919    the local submatrix with either d_nz or d_nnz (not both).  Set
1920    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1921    memory allocation.  Likewise, specify preallocated storage for the
1922    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1923 
1924    You can call MatGetInfo() to get information on how effective the preallocation was;
1925    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1926    You can also run with the option -info and look for messages with the string
1927    malloc in them to see if additional memory allocation was needed.
1928 
1929    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1930    the figure below we depict these three local rows and all columns (0-11).
1931 
1932 .vb
1933            0 1 2 3 4 5 6 7 8 9 10 11
1934           -------------------
1935    row 3  |  . . . d d d o o o o o o
1936    row 4  |  . . . d d d o o o o o o
1937    row 5  |  . . . d d d o o o o o o
1938           -------------------
1939 .ve
1940 
1941    Thus, any entries in the d locations are stored in the d (diagonal)
1942    submatrix, and any entries in the o locations are stored in the
1943    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1944    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1945 
1946    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1947    plus the diagonal part of the d matrix,
1948    and o_nz should indicate the number of block nonzeros per row in the o matrix
1949 
1950    In general, for PDE problems in which most nonzeros are near the diagonal,
1951    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1952    or you will get TERRIBLE performance; see the users' manual chapter on
1953    matrices.
1954 
1955    Level: intermediate
1956 
1957 .keywords: matrix, block, aij, compressed row, sparse, parallel
1958 
1959 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
1960 @*/
1961 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1962 {
1963   PetscErrorCode ierr;
1964 
1965   PetscFunctionBegin;
1966   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1967   PetscValidType(B,1);
1968   PetscValidLogicalCollectiveInt(B,bs,2);
1969   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1970   PetscFunctionReturn(0);
1971 }
1972 
1973 #undef __FUNCT__
1974 #define __FUNCT__ "MatCreateSBAIJ"
1975 /*@C
1976    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1977    (block compressed row).  For good matrix assembly performance
1978    the user should preallocate the matrix storage by setting the parameters
1979    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1980    performance can be increased by more than a factor of 50.
1981 
1982    Collective on MPI_Comm
1983 
1984    Input Parameters:
1985 +  comm - MPI communicator
1986 .  bs   - size of blockk
1987 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1988            This value should be the same as the local size used in creating the
1989            y vector for the matrix-vector product y = Ax.
1990 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1991            This value should be the same as the local size used in creating the
1992            x vector for the matrix-vector product y = Ax.
1993 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1994 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1995 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1996            submatrix  (same for all local rows)
1997 .  d_nnz - array containing the number of block nonzeros in the various block rows
1998            in the upper triangular portion of the in diagonal portion of the local
1999            (possibly different for each block block row) or PETSC_NULL.
2000            If you plan to factor the matrix you must leave room for the diagonal entry and
2001            set its value even if it is zero.
2002 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2003            submatrix (same for all local rows).
2004 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2005            off-diagonal portion of the local submatrix (possibly different for
2006            each block row) or PETSC_NULL.
2007 
2008    Output Parameter:
2009 .  A - the matrix
2010 
2011    Options Database Keys:
2012 .   -mat_no_unroll - uses code that does not unroll the loops in the
2013                      block calculations (much slower)
2014 .   -mat_block_size - size of the blocks to use
2015 .   -mat_mpi - use the parallel matrix data structures even on one processor
2016                (defaults to using SeqBAIJ format on one processor)
2017 
2018    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2019    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2020    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2021 
2022    Notes:
2023    The number of rows and columns must be divisible by blocksize.
2024    This matrix type does not support complex Hermitian operation.
2025 
2026    The user MUST specify either the local or global matrix dimensions
2027    (possibly both).
2028 
2029    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2030    than it must be used on all processors that share the object for that argument.
2031 
2032    If the *_nnz parameter is given then the *_nz parameter is ignored
2033 
2034    Storage Information:
2035    For a square global matrix we define each processor's diagonal portion
2036    to be its local rows and the corresponding columns (a square submatrix);
2037    each processor's off-diagonal portion encompasses the remainder of the
2038    local matrix (a rectangular submatrix).
2039 
2040    The user can specify preallocated storage for the diagonal part of
2041    the local submatrix with either d_nz or d_nnz (not both).  Set
2042    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2043    memory allocation.  Likewise, specify preallocated storage for the
2044    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2045 
2046    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2047    the figure below we depict these three local rows and all columns (0-11).
2048 
2049 .vb
2050            0 1 2 3 4 5 6 7 8 9 10 11
2051           -------------------
2052    row 3  |  . . . d d d o o o o o o
2053    row 4  |  . . . d d d o o o o o o
2054    row 5  |  . . . d d d o o o o o o
2055           -------------------
2056 .ve
2057 
2058    Thus, any entries in the d locations are stored in the d (diagonal)
2059    submatrix, and any entries in the o locations are stored in the
2060    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2061    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2062 
2063    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2064    plus the diagonal part of the d matrix,
2065    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2066    In general, for PDE problems in which most nonzeros are near the diagonal,
2067    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2068    or you will get TERRIBLE performance; see the users' manual chapter on
2069    matrices.
2070 
2071    Level: intermediate
2072 
2073 .keywords: matrix, block, aij, compressed row, sparse, parallel
2074 
2075 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2076 @*/
2077 
2078 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2079 {
2080   PetscErrorCode ierr;
2081   PetscMPIInt    size;
2082 
2083   PetscFunctionBegin;
2084   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2085   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2086   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2087   if (size > 1) {
2088     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2089     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2090   } else {
2091     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2092     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2093   }
2094   PetscFunctionReturn(0);
2095 }
2096 
2097 
2098 #undef __FUNCT__
2099 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2100 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2101 {
2102   Mat            mat;
2103   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2104   PetscErrorCode ierr;
2105   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2106   PetscScalar    *array;
2107 
2108   PetscFunctionBegin;
2109   *newmat       = 0;
2110   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2111   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2112   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2113   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2114   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2115   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2116 
2117   mat->factortype   = matin->factortype;
2118   mat->preallocated = PETSC_TRUE;
2119   mat->assembled    = PETSC_TRUE;
2120   mat->insertmode   = NOT_SET_VALUES;
2121 
2122   a = (Mat_MPISBAIJ*)mat->data;
2123   a->bs2   = oldmat->bs2;
2124   a->mbs   = oldmat->mbs;
2125   a->nbs   = oldmat->nbs;
2126   a->Mbs   = oldmat->Mbs;
2127   a->Nbs   = oldmat->Nbs;
2128 
2129 
2130   a->size         = oldmat->size;
2131   a->rank         = oldmat->rank;
2132   a->donotstash   = oldmat->donotstash;
2133   a->roworiented  = oldmat->roworiented;
2134   a->rowindices   = 0;
2135   a->rowvalues    = 0;
2136   a->getrowactive = PETSC_FALSE;
2137   a->barray       = 0;
2138   a->rstartbs    = oldmat->rstartbs;
2139   a->rendbs      = oldmat->rendbs;
2140   a->cstartbs    = oldmat->cstartbs;
2141   a->cendbs      = oldmat->cendbs;
2142 
2143   /* hash table stuff */
2144   a->ht           = 0;
2145   a->hd           = 0;
2146   a->ht_size      = 0;
2147   a->ht_flag      = oldmat->ht_flag;
2148   a->ht_fact      = oldmat->ht_fact;
2149   a->ht_total_ct  = 0;
2150   a->ht_insert_ct = 0;
2151 
2152   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2153   if (oldmat->colmap) {
2154 #if defined (PETSC_USE_CTABLE)
2155     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2156 #else
2157     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2158     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2159     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2160 #endif
2161   } else a->colmap = 0;
2162 
2163   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2164     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2165     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2166     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2167   } else a->garray = 0;
2168 
2169   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2170   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2171   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2172   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2173   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2174 
2175   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2177   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2178   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2179 
2180   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2181   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2182   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2183   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2184   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2185   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2186   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2187   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2188   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2189   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2190   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2191   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2192   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2193 
2194   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2195   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2196   a->sMvctx = oldmat->sMvctx;
2197   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2198 
2199   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2200   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2201   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2202   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2203   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2204   *newmat = mat;
2205   PetscFunctionReturn(0);
2206 }
2207 
2208 #undef __FUNCT__
2209 #define __FUNCT__ "MatLoad_MPISBAIJ"
2210 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2211 {
2212   PetscErrorCode ierr;
2213   PetscInt       i,nz,j,rstart,rend;
2214   PetscScalar    *vals,*buf;
2215   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2216   MPI_Status     status;
2217   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2218   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2219   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2220   PetscInt       bs=1,Mbs,mbs,extra_rows;
2221   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2222   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2223   int            fd;
2224 
2225   PetscFunctionBegin;
2226   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2227     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2228   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2229 
2230   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2231   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2232   if (!rank) {
2233     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2234     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2235     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2236     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2237   }
2238 
2239   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2240 
2241   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2242   M = header[1]; N = header[2];
2243 
2244   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2245   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2246   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2247 
2248   /* If global sizes are set, check if they are consistent with that given in the file */
2249   if (sizesset) {
2250     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2251   }
2252   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2253   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2254 
2255   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2256 
2257   /*
2258      This code adds extra rows to make sure the number of rows is
2259      divisible by the blocksize
2260   */
2261   Mbs        = M/bs;
2262   extra_rows = bs - M + bs*(Mbs);
2263   if (extra_rows == bs) extra_rows = 0;
2264   else                  Mbs++;
2265   if (extra_rows &&!rank) {
2266     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2267   }
2268 
2269   /* determine ownership of all rows */
2270   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2271     mbs        = Mbs/size + ((Mbs % size) > rank);
2272     m          = mbs*bs;
2273   } else { /* User Set */
2274     m          = newmat->rmap->n;
2275     mbs        = m/bs;
2276   }
2277   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2278   mmbs       = PetscMPIIntCast(mbs);
2279   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2280   rowners[0] = 0;
2281   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2282   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2283   rstart = rowners[rank];
2284   rend   = rowners[rank+1];
2285 
2286   /* distribute row lengths to all processors */
2287   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2288   if (!rank) {
2289     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2290     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2291     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2292     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2293     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2294     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2295     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2296   } else {
2297     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2298   }
2299 
2300   if (!rank) {   /* procs[0] */
2301     /* calculate the number of nonzeros on each processor */
2302     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2303     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2304     for (i=0; i<size; i++) {
2305       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2306         procsnz[i] += rowlengths[j];
2307       }
2308     }
2309     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2310 
2311     /* determine max buffer needed and allocate it */
2312     maxnz = 0;
2313     for (i=0; i<size; i++) {
2314       maxnz = PetscMax(maxnz,procsnz[i]);
2315     }
2316     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2317 
2318     /* read in my part of the matrix column indices  */
2319     nz     = procsnz[0];
2320     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2321     mycols = ibuf;
2322     if (size == 1)  nz -= extra_rows;
2323     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2324     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2325 
2326     /* read in every ones (except the last) and ship off */
2327     for (i=1; i<size-1; i++) {
2328       nz   = procsnz[i];
2329       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2330       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2331     }
2332     /* read in the stuff for the last proc */
2333     if (size != 1) {
2334       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2335       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2336       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2337       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2338     }
2339     ierr = PetscFree(cols);CHKERRQ(ierr);
2340   } else {  /* procs[i], i>0 */
2341     /* determine buffer space needed for message */
2342     nz = 0;
2343     for (i=0; i<m; i++) {
2344       nz += locrowlens[i];
2345     }
2346     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2347     mycols = ibuf;
2348     /* receive message of column indices*/
2349     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2350     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2351     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2352   }
2353 
2354   /* loop over local rows, determining number of off diagonal entries */
2355   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2356   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2357   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2358   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2359   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2360   rowcount = 0;
2361   nzcount  = 0;
2362   for (i=0; i<mbs; i++) {
2363     dcount  = 0;
2364     odcount = 0;
2365     for (j=0; j<bs; j++) {
2366       kmax = locrowlens[rowcount];
2367       for (k=0; k<kmax; k++) {
2368         tmp = mycols[nzcount++]/bs; /* block col. index */
2369         if (!mask[tmp]) {
2370           mask[tmp] = 1;
2371           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2372           else masked1[dcount++] = tmp; /* entry in diag portion */
2373         }
2374       }
2375       rowcount++;
2376     }
2377 
2378     dlens[i]  = dcount;  /* d_nzz[i] */
2379     odlens[i] = odcount; /* o_nzz[i] */
2380 
2381     /* zero out the mask elements we set */
2382     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2383     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2384   }
2385     if (!sizesset) {
2386     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2387   }
2388   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2389   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2390 
2391   if (!rank) {
2392     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2393     /* read in my part of the matrix numerical values  */
2394     nz = procsnz[0];
2395     vals = buf;
2396     mycols = ibuf;
2397     if (size == 1)  nz -= extra_rows;
2398     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2399     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2400 
2401     /* insert into matrix */
2402     jj      = rstart*bs;
2403     for (i=0; i<m; i++) {
2404       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2405       mycols += locrowlens[i];
2406       vals   += locrowlens[i];
2407       jj++;
2408     }
2409 
2410     /* read in other processors (except the last one) and ship out */
2411     for (i=1; i<size-1; i++) {
2412       nz   = procsnz[i];
2413       vals = buf;
2414       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2415       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2416     }
2417     /* the last proc */
2418     if (size != 1){
2419       nz   = procsnz[i] - extra_rows;
2420       vals = buf;
2421       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2422       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2423       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2424     }
2425     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2426 
2427   } else {
2428     /* receive numeric values */
2429     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2430 
2431     /* receive message of values*/
2432     vals   = buf;
2433     mycols = ibuf;
2434     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2435     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2436     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2437 
2438     /* insert into matrix */
2439     jj      = rstart*bs;
2440     for (i=0; i<m; i++) {
2441       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2442       mycols += locrowlens[i];
2443       vals   += locrowlens[i];
2444       jj++;
2445     }
2446   }
2447 
2448   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2449   ierr = PetscFree(buf);CHKERRQ(ierr);
2450   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2451   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2452   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2453   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2454   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2455   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2461 /*XXXXX@
2462    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2463 
2464    Input Parameters:
2465 .  mat  - the matrix
2466 .  fact - factor
2467 
2468    Not Collective on Mat, each process can have a different hash factor
2469 
2470    Level: advanced
2471 
2472   Notes:
2473    This can also be set by the command line option: -mat_use_hash_table fact
2474 
2475 .keywords: matrix, hashtable, factor, HT
2476 
2477 .seealso: MatSetOption()
2478 @XXXXX*/
2479 
2480 
2481 #undef __FUNCT__
2482 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2483 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2484 {
2485   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2486   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2487   PetscReal      atmp;
2488   PetscReal      *work,*svalues,*rvalues;
2489   PetscErrorCode ierr;
2490   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2491   PetscMPIInt    rank,size;
2492   PetscInt       *rowners_bs,dest,count,source;
2493   PetscScalar    *va;
2494   MatScalar      *ba;
2495   MPI_Status     stat;
2496 
2497   PetscFunctionBegin;
2498   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2499   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2500   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2501 
2502   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2503   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2504 
2505   bs   = A->rmap->bs;
2506   mbs  = a->mbs;
2507   Mbs  = a->Mbs;
2508   ba   = b->a;
2509   bi   = b->i;
2510   bj   = b->j;
2511 
2512   /* find ownerships */
2513   rowners_bs = A->rmap->range;
2514 
2515   /* each proc creates an array to be distributed */
2516   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2517   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2518 
2519   /* row_max for B */
2520   if (rank != size-1){
2521     for (i=0; i<mbs; i++) {
2522       ncols = bi[1] - bi[0]; bi++;
2523       brow  = bs*i;
2524       for (j=0; j<ncols; j++){
2525         bcol = bs*(*bj);
2526         for (kcol=0; kcol<bs; kcol++){
2527           col = bcol + kcol;                 /* local col index */
2528           col += rowners_bs[rank+1];      /* global col index */
2529           for (krow=0; krow<bs; krow++){
2530             atmp = PetscAbsScalar(*ba); ba++;
2531             row = brow + krow;    /* local row index */
2532             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2533             if (work[col] < atmp) work[col] = atmp;
2534           }
2535         }
2536         bj++;
2537       }
2538     }
2539 
2540     /* send values to its owners */
2541     for (dest=rank+1; dest<size; dest++){
2542       svalues = work + rowners_bs[dest];
2543       count   = rowners_bs[dest+1]-rowners_bs[dest];
2544       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2545     }
2546   }
2547 
2548   /* receive values */
2549   if (rank){
2550     rvalues = work;
2551     count   = rowners_bs[rank+1]-rowners_bs[rank];
2552     for (source=0; source<rank; source++){
2553       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2554       /* process values */
2555       for (i=0; i<count; i++){
2556         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2557       }
2558     }
2559   }
2560 
2561   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2562   ierr = PetscFree(work);CHKERRQ(ierr);
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatSOR_MPISBAIJ"
2568 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2569 {
2570   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2571   PetscErrorCode    ierr;
2572   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2573   PetscScalar       *x,*ptr,*from;
2574   Vec               bb1;
2575   const PetscScalar *b;
2576 
2577   PetscFunctionBegin;
2578   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2579   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2580 
2581   if (flag == SOR_APPLY_UPPER) {
2582     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2583     PetscFunctionReturn(0);
2584   }
2585 
2586   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2587     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2588       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2589       its--;
2590     }
2591 
2592     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2593     while (its--){
2594 
2595       /* lower triangular part: slvec0b = - B^T*xx */
2596       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2597 
2598       /* copy xx into slvec0a */
2599       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2600       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2601       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2602       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2603 
2604       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2605 
2606       /* copy bb into slvec1a */
2607       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2608       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2609       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2610       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2611 
2612       /* set slvec1b = 0 */
2613       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2614 
2615       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2616       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2617       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2618       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2619 
2620       /* upper triangular part: bb1 = bb1 - B*x */
2621       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2622 
2623       /* local diagonal sweep */
2624       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2625     }
2626     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2627   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2628     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2629   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2630     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2631   } else if (flag & SOR_EISENSTAT) {
2632     Vec               xx1;
2633     PetscBool         hasop;
2634     const PetscScalar *diag;
2635     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2636     PetscInt          i,n;
2637 
2638     if (!mat->xx1) {
2639       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2640       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2641     }
2642     xx1 = mat->xx1;
2643     bb1 = mat->bb1;
2644 
2645     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2646 
2647     if (!mat->diag) {
2648       /* this is wrong for same matrix with new nonzero values */
2649       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2650       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2651     }
2652     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2653 
2654     if (hasop) {
2655       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2656       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2657     } else {
2658       /*
2659           These two lines are replaced by code that may be a bit faster for a good compiler
2660       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2661       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2662       */
2663       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2664       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2665       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2666       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2667       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2668       if (omega == 1.0) {
2669 	for (i=0; i<n; i++) {
2670 	  sl[i] = b[i] - diag[i]*x[i];
2671 	}
2672         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2673       } else {
2674 	for (i=0; i<n; i++) {
2675 	  sl[i] = b[i] + scale*diag[i]*x[i];
2676 	}
2677         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2678       }
2679       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2680       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2681       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2682       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2683     }
2684 
2685     /* multiply off-diagonal portion of matrix */
2686     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2687     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2688     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2689     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2690     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2691     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2692     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2693     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2694     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2695     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2696 
2697     /* local sweep */
2698     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2699     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2700   } else {
2701     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2702   }
2703   PetscFunctionReturn(0);
2704 }
2705 
2706 #undef __FUNCT__
2707 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2708 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2709 {
2710   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2711   PetscErrorCode ierr;
2712   Vec            lvec1,bb1;
2713 
2714   PetscFunctionBegin;
2715   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2716   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2717 
2718   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2719     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2720       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2721       its--;
2722     }
2723 
2724     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2725     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2726     while (its--){
2727       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2728 
2729       /* lower diagonal part: bb1 = bb - B^T*xx */
2730       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2731       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2732 
2733       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2734       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2735       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2736 
2737       /* upper diagonal part: bb1 = bb1 - B*x */
2738       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2739       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2740 
2741       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2742 
2743       /* diagonal sweep */
2744       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2745     }
2746     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2747     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2748   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2749   PetscFunctionReturn(0);
2750 }
2751 
2752 #undef __FUNCT__
2753 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2754 /*@
2755      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2756          CSR format the local rows.
2757 
2758    Collective on MPI_Comm
2759 
2760    Input Parameters:
2761 +  comm - MPI communicator
2762 .  bs - the block size, only a block size of 1 is supported
2763 .  m - number of local rows (Cannot be PETSC_DECIDE)
2764 .  n - This value should be the same as the local size used in creating the
2765        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2766        calculated if N is given) For square matrices n is almost always m.
2767 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2768 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2769 .   i - row indices
2770 .   j - column indices
2771 -   a - matrix values
2772 
2773    Output Parameter:
2774 .   mat - the matrix
2775 
2776    Level: intermediate
2777 
2778    Notes:
2779        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2780      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2781      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2782 
2783        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2784 
2785 .keywords: matrix, aij, compressed row, sparse, parallel
2786 
2787 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2788           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2789 @*/
2790 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2791 {
2792   PetscErrorCode ierr;
2793 
2794 
2795  PetscFunctionBegin;
2796   if (i[0]) {
2797     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2798   }
2799   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2800   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2801   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2802   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2803   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2804   PetscFunctionReturn(0);
2805 }
2806 
2807 
2808 #undef __FUNCT__
2809 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2810 /*@C
2811    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2812    (the default parallel PETSc format).
2813 
2814    Collective on MPI_Comm
2815 
2816    Input Parameters:
2817 +  A - the matrix
2818 .  bs - the block size
2819 .  i - the indices into j for the start of each local row (starts with zero)
2820 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2821 -  v - optional values in the matrix
2822 
2823    Level: developer
2824 
2825 .keywords: matrix, aij, compressed row, sparse, parallel
2826 
2827 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2828 @*/
2829 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2830 {
2831   PetscErrorCode ierr;
2832 
2833   PetscFunctionBegin;
2834   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2835   PetscFunctionReturn(0);
2836 }
2837 
2838 
2839