xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a6dfd86ebdf75fa024070af26d51b62fd16650a3)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash) {
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j];
200           else             value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j];
229           else             value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
236       if (!baij->donotstash) {
237         mat->assembled = PETSC_FALSE;
238         n_loc = 0;
239         for (j=0; j<n; j++) {
240           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
241           in_loc[n_loc] = in[j];
242           if (roworiented) {
243             v_loc[n_loc] = v[i*n+j];
244           } else {
245             v_loc[n_loc] = v[j*m+i];
246           }
247           n_loc++;
248         }
249         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
250       }
251     }
252   }
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
259 {
260   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
261   const MatScalar *value;
262   MatScalar       *barray=baij->barray;
263   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
264   PetscErrorCode  ierr;
265   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
266   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
267   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
268 
269   PetscFunctionBegin;
270   if (!barray) {
271     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
272     baij->barray = barray;
273   }
274 
275   if (roworiented) {
276     stepval = (n-1)*bs;
277   } else {
278     stepval = (m-1)*bs;
279   }
280   for (i=0; i<m; i++) {
281     if (im[i] < 0) continue;
282 #if defined(PETSC_USE_DEBUG)
283     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
284 #endif
285     if (im[i] >= rstart && im[i] < rend) {
286       row = im[i] - rstart;
287       for (j=0; j<n; j++) {
288         if (im[i] > in[j]) {
289           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
290           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
291         }
292         /* If NumCol = 1 then a copy is not required */
293         if ((roworiented) && (n == 1)) {
294           barray = (MatScalar*) v + i*bs2;
295         } else if ((!roworiented) && (m == 1)) {
296           barray = (MatScalar*) v + j*bs2;
297         } else { /* Here a copy is required */
298           if (roworiented) {
299             value = v + i*(stepval+bs)*bs + j*bs;
300           } else {
301             value = v + j*(stepval+bs)*bs + i*bs;
302           }
303           for (ii=0; ii<bs; ii++,value+=stepval) {
304             for (jj=0; jj<bs; jj++) {
305               *barray++  = *value++;
306             }
307           }
308           barray -=bs2;
309         }
310 
311         if (in[j] >= cstart && in[j] < cend) {
312           col  = in[j] - cstart;
313           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
314         }
315         else if (in[j] < 0) continue;
316 #if defined(PETSC_USE_DEBUG)
317         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
318 #endif
319         else {
320           if (mat->was_assembled) {
321             if (!baij->colmap) {
322               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
323             }
324 
325 #if defined(PETSC_USE_DEBUG)
326 #if defined (PETSC_USE_CTABLE)
327             { PetscInt data;
328               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
329               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330             }
331 #else
332             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
333 #endif
334 #endif
335 #if defined (PETSC_USE_CTABLE)
336             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
337             col  = (col - 1)/bs;
338 #else
339             col = (baij->colmap[in[j]] - 1)/bs;
340 #endif
341             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
342               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
343               col =  in[j];
344             }
345           }
346           else col = in[j];
347           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
348         }
349       }
350     } else {
351       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
352       if (!baij->donotstash) {
353         if (roworiented) {
354           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         } else {
356           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
357         }
358       }
359     }
360   }
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNCT__
365 #define __FUNCT__ "MatGetValues_MPISBAIJ"
366 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
367 {
368   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
369   PetscErrorCode ierr;
370   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
371   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
372 
373   PetscFunctionBegin;
374   for (i=0; i<m; i++) {
375     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
376     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
377     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
378       row = idxm[i] - bsrstart;
379       for (j=0; j<n; j++) {
380         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
381         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
382         if (idxn[j] >= bscstart && idxn[j] < bscend) {
383           col = idxn[j] - bscstart;
384           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
385         } else {
386           if (!baij->colmap) {
387             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388           }
389 #if defined (PETSC_USE_CTABLE)
390           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
391           data --;
392 #else
393           data = baij->colmap[idxn[j]/bs]-1;
394 #endif
395           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
396           else {
397             col  = data + idxn[j]%bs;
398             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
399           }
400         }
401       }
402     } else {
403       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
404     }
405   }
406  PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatNorm_MPISBAIJ"
411 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
412 {
413   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
414   PetscErrorCode ierr;
415   PetscReal      sum[2],*lnorm2;
416 
417   PetscFunctionBegin;
418   if (baij->size == 1) {
419     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
420   } else {
421     if (type == NORM_FROBENIUS) {
422       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
423       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
425       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
426       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
427       ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
428       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
429       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
430     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
431       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
432       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
433       PetscReal    *rsum,*rsum2,vabs;
434       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
435       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
436       MatScalar    *v;
437 
438       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
439       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
440       /* Amat */
441       v = amat->a; jj = amat->j;
442       for (brow=0; brow<mbs; brow++) {
443         grow = bs*(rstart + brow);
444         nz = amat->i[brow+1] - amat->i[brow];
445         for (bcol=0; bcol<nz; bcol++) {
446           gcol = bs*(rstart + *jj); jj++;
447           for (col=0; col<bs; col++) {
448             for (row=0; row<bs; row++) {
449               vabs = PetscAbsScalar(*v); v++;
450               rsum[gcol+col] += vabs;
451               /* non-diagonal block */
452               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
453             }
454           }
455         }
456       }
457       /* Bmat */
458       v = bmat->a; jj = bmat->j;
459       for (brow=0; brow<mbs; brow++) {
460         grow = bs*(rstart + brow);
461         nz = bmat->i[brow+1] - bmat->i[brow];
462         for (bcol=0; bcol<nz; bcol++) {
463           gcol = bs*garray[*jj]; jj++;
464           for (col=0; col<bs; col++) {
465             for (row=0; row<bs; row++) {
466               vabs = PetscAbsScalar(*v); v++;
467               rsum[gcol+col] += vabs;
468               rsum[grow+row] += vabs;
469             }
470           }
471         }
472       }
473       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
474       *norm = 0.0;
475       for (col=0; col<mat->cmap->N; col++) {
476         if (rsum2[col] > *norm) *norm = rsum2[col];
477       }
478       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
479     } else {
480       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
481     }
482   }
483   PetscFunctionReturn(0);
484 }
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
488 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
489 {
490   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
491   PetscErrorCode ierr;
492   PetscInt       nstash,reallocs;
493   InsertMode     addv;
494 
495   PetscFunctionBegin;
496   if (baij->donotstash || mat->nooffprocentries) {
497     PetscFunctionReturn(0);
498   }
499 
500   /* make sure all processors are either in INSERTMODE or ADDMODE */
501   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
502   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
503   mat->insertmode = addv; /* in case this processor had no cache */
504 
505   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
506   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
510   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
516 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
519   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
520   PetscErrorCode ierr;
521   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
522   PetscInt       *row,*col;
523   PetscBool      other_disassembled;
524   PetscMPIInt    n;
525   PetscBool      r1,r2,r3;
526   MatScalar      *val;
527   InsertMode     addv = mat->insertmode;
528 
529   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
530   PetscFunctionBegin;
531 
532   if (!baij->donotstash &&  !mat->nooffprocentries) {
533     while (1) {
534       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
535       if (!flg) break;
536 
537       for (i=0; i<n;) {
538         /* Now identify the consecutive vals belonging to the same row */
539         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
540         if (j < n) ncols = j-i;
541         else       ncols = n-i;
542         /* Now assemble all these values with a single function call */
543         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
544         i = j;
545       }
546     }
547     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
548     /* Now process the block-stash. Since the values are stashed column-oriented,
549        set the roworiented flag to column oriented, and after MatSetValues()
550        restore the original flags */
551     r1 = baij->roworiented;
552     r2 = a->roworiented;
553     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
554     baij->roworiented = PETSC_FALSE;
555     a->roworiented    = PETSC_FALSE;
556     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
557     while (1) {
558       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
559       if (!flg) break;
560 
561       for (i=0; i<n;) {
562         /* Now identify the consecutive vals belonging to the same row */
563         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
564         if (j < n) ncols = j-i;
565         else       ncols = n-i;
566         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
567         i = j;
568       }
569     }
570     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
571     baij->roworiented = r1;
572     a->roworiented    = r2;
573     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
574   }
575 
576   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
577   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
578 
579   /* determine if any processor has disassembled, if so we must
580      also disassemble ourselfs, in order that we may reassemble. */
581   /*
582      if nonzero structure of submatrix B cannot change then we know that
583      no processor disassembled thus we can skip this stuff
584   */
585   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
586     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
587     if (mat->was_assembled && !other_disassembled) {
588       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
589     }
590   }
591 
592   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
593     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
594   }
595   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
596   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
597   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
598 
599   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
600   baij->rowvalues = 0;
601 
602   PetscFunctionReturn(0);
603 }
604 
605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606 #undef __FUNCT__
607 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
608 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
609 {
610   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
611   PetscErrorCode    ierr;
612   PetscInt          bs = mat->rmap->bs;
613   PetscMPIInt       size = baij->size,rank = baij->rank;
614   PetscBool         iascii,isdraw;
615   PetscViewer       sviewer;
616   PetscViewerFormat format;
617 
618   PetscFunctionBegin;
619   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
620   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
621   if (iascii) {
622     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
623     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
624       MatInfo info;
625       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
626       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
633       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
635       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
636       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_INFO) {
639       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
640       PetscFunctionReturn(0);
641     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
642       PetscFunctionReturn(0);
643     }
644   }
645 
646   if (isdraw) {
647     PetscDraw  draw;
648     PetscBool  isnull;
649     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
650     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
651   }
652 
653   if (size == 1) {
654     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
655     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
656   } else {
657     /* assemble the entire matrix onto first processor. */
658     Mat          A;
659     Mat_SeqSBAIJ *Aloc;
660     Mat_SeqBAIJ  *Bloc;
661     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
662     MatScalar    *a;
663 
664     /* Should this be the same type as mat? */
665     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
666     if (!rank) {
667       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
668     } else {
669       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
670     }
671     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
672     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
673     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
674     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
675 
676     /* copy over the A part */
677     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
678     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
679     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
680 
681     for (i=0; i<mbs; i++) {
682       rvals[0] = bs*(baij->rstartbs + i);
683       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
684       for (j=ai[i]; j<ai[i+1]; j++) {
685         col = (baij->cstartbs+aj[j])*bs;
686         for (k=0; k<bs; k++) {
687           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
688           col++; a += bs;
689         }
690       }
691     }
692     /* copy over the B part */
693     Bloc = (Mat_SeqBAIJ*)baij->B->data;
694     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
695     for (i=0; i<mbs; i++) {
696 
697       rvals[0] = bs*(baij->rstartbs + i);
698       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
699       for (j=ai[i]; j<ai[i+1]; j++) {
700         col = baij->garray[aj[j]]*bs;
701         for (k=0; k<bs; k++) {
702           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
703           col++; a += bs;
704         }
705       }
706     }
707     ierr = PetscFree(rvals);CHKERRQ(ierr);
708     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     /*
711        Everyone has to call to draw the matrix since the graphics waits are
712        synchronized across all processors that share the PetscDraw object
713     */
714     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     if (!rank) {
716       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
717           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
718       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
719       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
720     }
721     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
722     ierr = MatDestroy(&A);CHKERRQ(ierr);
723   }
724   PetscFunctionReturn(0);
725 }
726 
727 #undef __FUNCT__
728 #define __FUNCT__ "MatView_MPISBAIJ"
729 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
730 {
731   PetscErrorCode ierr;
732   PetscBool      iascii,isdraw,issocket,isbinary;
733 
734   PetscFunctionBegin;
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
739   if (iascii || isdraw || issocket || isbinary) {
740     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
741   } else {
742     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
743   }
744   PetscFunctionReturn(0);
745 }
746 
747 #undef __FUNCT__
748 #define __FUNCT__ "MatDestroy_MPISBAIJ"
749 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
750 {
751   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
752   PetscErrorCode ierr;
753 
754   PetscFunctionBegin;
755 #if defined(PETSC_USE_LOG)
756   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
757 #endif
758   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
759   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
760   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
761   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
762 #if defined (PETSC_USE_CTABLE)
763   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
764 #else
765   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
766 #endif
767   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
769   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
775   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
776   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
777   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
778   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
781   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
782 #if defined(PETSC_USE_REAL_MAT_SINGLE)
783   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
784 #endif
785   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
788   ierr = PetscFree(mat->data);CHKERRQ(ierr);
789 
790   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
796   PetscFunctionReturn(0);
797 }
798 
799 #undef __FUNCT__
800 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
801 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
802 {
803   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
804   PetscErrorCode ierr;
805   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
806   PetscScalar    *x,*from;
807 
808   PetscFunctionBegin;
809   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
810   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
811 
812   /* diagonal part */
813   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
814   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
815 
816   /* subdiagonal part */
817   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
818 
819   /* copy x into the vec slvec0 */
820   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
824   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
825   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
826 
827   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829   /* supperdiagonal part */
830   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
831   PetscFunctionReturn(0);
832 }
833 
834 #undef __FUNCT__
835 #define __FUNCT__ "MatMult_MPISBAIJ"
836 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
837 {
838   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
839   PetscErrorCode ierr;
840   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
841   PetscScalar    *x,*from;
842 
843   PetscFunctionBegin;
844   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
845   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
846 
847   /* diagonal part */
848   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
849   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
850 
851   /* subdiagonal part */
852   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
853 
854   /* copy x into the vec slvec0 */
855   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
859   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
860   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
861 
862   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864   /* supperdiagonal part */
865   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
866   PetscFunctionReturn(0);
867 }
868 
869 #undef __FUNCT__
870 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
871 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
872 {
873   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
874   PetscErrorCode ierr;
875   PetscInt       nt;
876 
877   PetscFunctionBegin;
878   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
879   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
880 
881   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
882   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
883 
884   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   /* do diagonal part */
886   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
887   /* do supperdiagonal part */
888   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
890   /* do subdiagonal part */
891   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
892   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
893   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
894 
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
900 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
901 {
902   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
903   PetscErrorCode ierr;
904   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
905   PetscScalar    *x,*from,zero=0.0;
906 
907   PetscFunctionBegin;
908   /*
909   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
910   PetscSynchronizedFlush(((PetscObject)A)->comm);
911   */
912   /* diagonal part */
913   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
914   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
915 
916   /* subdiagonal part */
917   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
918 
919   /* copy x into the vec slvec0 */
920   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
921   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
922   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
923   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
924 
925   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
927   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928 
929   /* supperdiagonal part */
930   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
931 
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
937 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
938 {
939   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
940   PetscErrorCode ierr;
941 
942   PetscFunctionBegin;
943   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   /* do diagonal part */
945   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
946   /* do supperdiagonal part */
947   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
948   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
949 
950   /* do subdiagonal part */
951   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
952   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
953   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
954 
955   PetscFunctionReturn(0);
956 }
957 
958 /*
959   This only works correctly for square matrices where the subblock A->A is the
960    diagonal block
961 */
962 #undef __FUNCT__
963 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
964 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
971   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
972   PetscFunctionReturn(0);
973 }
974 
975 #undef __FUNCT__
976 #define __FUNCT__ "MatScale_MPISBAIJ"
977 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
978 {
979   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
980   PetscErrorCode ierr;
981 
982   PetscFunctionBegin;
983   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
984   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
985   PetscFunctionReturn(0);
986 }
987 
988 #undef __FUNCT__
989 #define __FUNCT__ "MatGetRow_MPISBAIJ"
990 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
991 {
992   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
993   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
994   PetscErrorCode ierr;
995   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
996   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
997   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
998 
999   PetscFunctionBegin;
1000   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1001   mat->getrowactive = PETSC_TRUE;
1002 
1003   if (!mat->rowvalues && (idx || v)) {
1004     /*
1005         allocate enough space to hold information from the longest row.
1006     */
1007     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1008     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1009     PetscInt     max = 1,mbs = mat->mbs,tmp;
1010     for (i=0; i<mbs; i++) {
1011       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1012       if (max < tmp) { max = tmp; }
1013     }
1014     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1015   }
1016 
1017   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1018   lrow = row - brstart;  /* local row index */
1019 
1020   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1021   if (!v)   {pvA = 0; pvB = 0;}
1022   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1023   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1024   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1025   nztot = nzA + nzB;
1026 
1027   cmap  = mat->garray;
1028   if (v  || idx) {
1029     if (nztot) {
1030       /* Sort by increasing column numbers, assuming A and B already sorted */
1031       PetscInt imark = -1;
1032       if (v) {
1033         *v = v_p = mat->rowvalues;
1034         for (i=0; i<nzB; i++) {
1035           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1036           else break;
1037         }
1038         imark = i;
1039         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1040         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1041       }
1042       if (idx) {
1043         *idx = idx_p = mat->rowindices;
1044         if (imark > -1) {
1045           for (i=0; i<imark; i++) {
1046             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1047           }
1048         } else {
1049           for (i=0; i<nzB; i++) {
1050             if (cmap[cworkB[i]/bs] < cstart)
1051               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1052             else break;
1053           }
1054           imark = i;
1055         }
1056         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1057         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1058       }
1059     } else {
1060       if (idx) *idx = 0;
1061       if (v)   *v   = 0;
1062     }
1063   }
1064   *nz = nztot;
1065   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1066   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1067   PetscFunctionReturn(0);
1068 }
1069 
1070 #undef __FUNCT__
1071 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1072 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1073 {
1074   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1075 
1076   PetscFunctionBegin;
1077   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1078   baij->getrowactive = PETSC_FALSE;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1084 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1085 {
1086   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1087   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1088 
1089   PetscFunctionBegin;
1090   aA->getrow_utriangular = PETSC_TRUE;
1091   PetscFunctionReturn(0);
1092 }
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1095 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1096 {
1097   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1098   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1099 
1100   PetscFunctionBegin;
1101   aA->getrow_utriangular = PETSC_FALSE;
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1107 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1108 {
1109   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1110   PetscErrorCode ierr;
1111 
1112   PetscFunctionBegin;
1113   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1114   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1120 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1121 {
1122   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1123   PetscErrorCode ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1127   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1133 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1134 {
1135   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1136   PetscErrorCode ierr;
1137 
1138   PetscFunctionBegin;
1139   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1140   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1146 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1147 {
1148   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1149   Mat            A = a->A,B = a->B;
1150   PetscErrorCode ierr;
1151   PetscReal      isend[5],irecv[5];
1152 
1153   PetscFunctionBegin;
1154   info->block_size     = (PetscReal)matin->rmap->bs;
1155   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1156   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1157   isend[3] = info->memory;  isend[4] = info->mallocs;
1158   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1159   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1160   isend[3] += info->memory;  isend[4] += info->mallocs;
1161   if (flag == MAT_LOCAL) {
1162     info->nz_used      = isend[0];
1163     info->nz_allocated = isend[1];
1164     info->nz_unneeded  = isend[2];
1165     info->memory       = isend[3];
1166     info->mallocs      = isend[4];
1167   } else if (flag == MAT_GLOBAL_MAX) {
1168     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1169     info->nz_used      = irecv[0];
1170     info->nz_allocated = irecv[1];
1171     info->nz_unneeded  = irecv[2];
1172     info->memory       = irecv[3];
1173     info->mallocs      = irecv[4];
1174   } else if (flag == MAT_GLOBAL_SUM) {
1175     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1176     info->nz_used      = irecv[0];
1177     info->nz_allocated = irecv[1];
1178     info->nz_unneeded  = irecv[2];
1179     info->memory       = irecv[3];
1180     info->mallocs      = irecv[4];
1181   } else {
1182     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1183   }
1184   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1185   info->fill_ratio_needed = 0;
1186   info->factor_mallocs    = 0;
1187   PetscFunctionReturn(0);
1188 }
1189 
1190 #undef __FUNCT__
1191 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1192 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1193 {
1194   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1195   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1196   PetscErrorCode ierr;
1197 
1198   PetscFunctionBegin;
1199   switch (op) {
1200   case MAT_NEW_NONZERO_LOCATIONS:
1201   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1202   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1203   case MAT_KEEP_NONZERO_PATTERN:
1204   case MAT_NEW_NONZERO_LOCATION_ERR:
1205     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1206     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1207     break;
1208   case MAT_ROW_ORIENTED:
1209     a->roworiented = flg;
1210     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1211     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1212     break;
1213   case MAT_NEW_DIAGONALS:
1214     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1215     break;
1216   case MAT_IGNORE_OFF_PROC_ENTRIES:
1217     a->donotstash = flg;
1218     break;
1219   case MAT_USE_HASH_TABLE:
1220     a->ht_flag = flg;
1221     break;
1222   case MAT_HERMITIAN:
1223     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1226     break;
1227   case MAT_SPD:
1228     A->spd_set                         = PETSC_TRUE;
1229     A->spd                             = flg;
1230     if (flg) {
1231       A->symmetric                     = PETSC_TRUE;
1232       A->structurally_symmetric        = PETSC_TRUE;
1233       A->symmetric_set                 = PETSC_TRUE;
1234       A->structurally_symmetric_set    = PETSC_TRUE;
1235     }
1236     break;
1237   case MAT_SYMMETRIC:
1238     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1239     break;
1240   case MAT_STRUCTURALLY_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1242     break;
1243   case MAT_SYMMETRY_ETERNAL:
1244     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1245     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1246     break;
1247   case MAT_IGNORE_LOWER_TRIANGULAR:
1248     aA->ignore_ltriangular = flg;
1249     break;
1250   case MAT_ERROR_LOWER_TRIANGULAR:
1251     aA->ignore_ltriangular = flg;
1252     break;
1253   case MAT_GETROW_UPPERTRIANGULAR:
1254     aA->getrow_utriangular = flg;
1255     break;
1256   default:
1257     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1258   }
1259   PetscFunctionReturn(0);
1260 }
1261 
1262 #undef __FUNCT__
1263 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1264 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1265 {
1266   PetscErrorCode ierr;
1267   PetscFunctionBegin;
1268   if (MAT_INITIAL_MATRIX || *B != A) {
1269     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1270   }
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNCT__
1275 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1276 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1277 {
1278   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1279   Mat            a=baij->A, b=baij->B;
1280   PetscErrorCode ierr;
1281   PetscInt       nv,m,n;
1282   PetscBool      flg;
1283 
1284   PetscFunctionBegin;
1285   if (ll != rr) {
1286     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1287     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1288   }
1289   if (!ll) PetscFunctionReturn(0);
1290 
1291   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1292   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1293 
1294   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1295   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1296 
1297   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1298 
1299   /* left diagonalscale the off-diagonal part */
1300   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1301 
1302   /* scale the diagonal part */
1303   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1304 
1305   /* right diagonalscale the off-diagonal part */
1306   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1313 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1314 {
1315   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatEqual_MPISBAIJ"
1327 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1328 {
1329   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1330   Mat            a,b,c,d;
1331   PetscBool      flg;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   a = matA->A; b = matA->B;
1336   c = matB->A; d = matB->B;
1337 
1338   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1339   if (flg) {
1340     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1341   }
1342   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatCopy_MPISBAIJ"
1348 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1349 {
1350   PetscErrorCode ierr;
1351   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1352   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1353 
1354   PetscFunctionBegin;
1355   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1356   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1357     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1358     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1359     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1360   } else {
1361     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1362     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1369 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1370 {
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1380 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1381 {
1382   PetscErrorCode ierr;
1383   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1384   PetscBLASInt   bnz,one=1;
1385   Mat_SeqSBAIJ   *xa,*ya;
1386   Mat_SeqBAIJ    *xb,*yb;
1387 
1388   PetscFunctionBegin;
1389   if (str == SAME_NONZERO_PATTERN) {
1390     PetscScalar alpha = a;
1391     xa = (Mat_SeqSBAIJ *)xx->A->data;
1392     ya = (Mat_SeqSBAIJ *)yy->A->data;
1393     bnz = PetscBLASIntCast(xa->nz);
1394     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1395     xb = (Mat_SeqBAIJ *)xx->B->data;
1396     yb = (Mat_SeqBAIJ *)yy->B->data;
1397     bnz = PetscBLASIntCast(xb->nz);
1398     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1399   } else {
1400     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1401     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1402     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1409 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1410 {
1411   PetscErrorCode ierr;
1412   PetscInt       i;
1413   PetscBool      flg;
1414 
1415   PetscFunctionBegin;
1416   for (i=0; i<n; i++) {
1417     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1418     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1419   }
1420   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 
1425 /* -------------------------------------------------------------------*/
1426 static struct _MatOps MatOps_Values = {
1427        MatSetValues_MPISBAIJ,
1428        MatGetRow_MPISBAIJ,
1429        MatRestoreRow_MPISBAIJ,
1430        MatMult_MPISBAIJ,
1431 /* 4*/ MatMultAdd_MPISBAIJ,
1432        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1433        MatMultAdd_MPISBAIJ,
1434        0,
1435        0,
1436        0,
1437 /*10*/ 0,
1438        0,
1439        0,
1440        MatSOR_MPISBAIJ,
1441        MatTranspose_MPISBAIJ,
1442 /*15*/ MatGetInfo_MPISBAIJ,
1443        MatEqual_MPISBAIJ,
1444        MatGetDiagonal_MPISBAIJ,
1445        MatDiagonalScale_MPISBAIJ,
1446        MatNorm_MPISBAIJ,
1447 /*20*/ MatAssemblyBegin_MPISBAIJ,
1448        MatAssemblyEnd_MPISBAIJ,
1449        MatSetOption_MPISBAIJ,
1450        MatZeroEntries_MPISBAIJ,
1451 /*24*/ 0,
1452        0,
1453        0,
1454        0,
1455        0,
1456 /*29*/ MatSetUp_MPISBAIJ,
1457        0,
1458        0,
1459        0,
1460        0,
1461 /*34*/ MatDuplicate_MPISBAIJ,
1462        0,
1463        0,
1464        0,
1465        0,
1466 /*39*/ MatAXPY_MPISBAIJ,
1467        MatGetSubMatrices_MPISBAIJ,
1468        MatIncreaseOverlap_MPISBAIJ,
1469        MatGetValues_MPISBAIJ,
1470        MatCopy_MPISBAIJ,
1471 /*44*/ 0,
1472        MatScale_MPISBAIJ,
1473        0,
1474        0,
1475        0,
1476 /*49*/ 0,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*54*/ 0,
1482        0,
1483        MatSetUnfactored_MPISBAIJ,
1484        0,
1485        MatSetValuesBlocked_MPISBAIJ,
1486 /*59*/ 0,
1487        0,
1488        0,
1489        0,
1490        0,
1491 /*64*/ 0,
1492        0,
1493        0,
1494        0,
1495        0,
1496 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*74*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*79*/ 0,
1507        0,
1508        0,
1509        0,
1510        MatLoad_MPISBAIJ,
1511 /*84*/ 0,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*89*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*94*/ 0,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*99*/ 0,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*104*/0,
1532        MatRealPart_MPISBAIJ,
1533        MatImaginaryPart_MPISBAIJ,
1534        MatGetRowUpperTriangular_MPISBAIJ,
1535        MatRestoreRowUpperTriangular_MPISBAIJ,
1536 /*109*/0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*114*/0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*119*/0,
1547        0,
1548        0,
1549        0
1550 };
1551 
1552 
1553 EXTERN_C_BEGIN
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1556 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1557 {
1558   PetscFunctionBegin;
1559   *a = ((Mat_MPISBAIJ *)A->data)->A;
1560   PetscFunctionReturn(0);
1561 }
1562 EXTERN_C_END
1563 
1564 EXTERN_C_BEGIN
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1567 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1568 {
1569   Mat_MPISBAIJ   *b;
1570   PetscErrorCode ierr;
1571   PetscInt       i,mbs,Mbs;
1572 
1573   PetscFunctionBegin;
1574   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1575   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1576   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1577   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1578 
1579   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1580   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1581   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1582   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1583   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1584 
1585   if (d_nnz) {
1586     for (i=0; i<B->rmap->n/bs; i++) {
1587       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1588     }
1589   }
1590   if (o_nnz) {
1591     for (i=0; i<B->rmap->n/bs; i++) {
1592       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1593     }
1594   }
1595 
1596   b   = (Mat_MPISBAIJ*)B->data;
1597   mbs = B->rmap->n/bs;
1598   Mbs = B->rmap->N/bs;
1599   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1600 
1601   B->rmap->bs  = bs;
1602   b->bs2 = bs*bs;
1603   b->mbs = mbs;
1604   b->nbs = mbs;
1605   b->Mbs = Mbs;
1606   b->Nbs = Mbs;
1607 
1608   for (i=0; i<=b->size; i++) {
1609     b->rangebs[i] = B->rmap->range[i]/bs;
1610   }
1611   b->rstartbs = B->rmap->rstart/bs;
1612   b->rendbs   = B->rmap->rend/bs;
1613 
1614   b->cstartbs = B->cmap->rstart/bs;
1615   b->cendbs   = B->cmap->rend/bs;
1616 
1617   if (!B->preallocated) {
1618     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1619     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1620     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1621     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1622     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1623     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1624     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1625     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1626     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1627   }
1628 
1629   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1630   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1631   B->preallocated = PETSC_TRUE;
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 EXTERN_C_BEGIN
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1639 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1640 {
1641   PetscInt       m,rstart,cstart,cend;
1642   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1643   const PetscInt *JJ=0;
1644   PetscScalar    *values=0;
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648 
1649   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1650   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1651   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1652   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1653   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1654   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1655   m      = B->rmap->n/bs;
1656   rstart = B->rmap->rstart/bs;
1657   cstart = B->cmap->rstart/bs;
1658   cend   = B->cmap->rend/bs;
1659 
1660   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1661   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) {
1663     nz = ii[i+1] - ii[i];
1664     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1665     nz_max = PetscMax(nz_max,nz);
1666     JJ  = jj + ii[i];
1667     for (j=0; j<nz; j++) {
1668       if (*JJ >= cstart) break;
1669       JJ++;
1670     }
1671     d = 0;
1672     for (; j<nz; j++) {
1673       if (*JJ++ >= cend) break;
1674       d++;
1675     }
1676     d_nnz[i] = d;
1677     o_nnz[i] = nz - d;
1678   }
1679   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1680   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1681 
1682   values = (PetscScalar*)V;
1683   if (!values) {
1684     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1685     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1686   }
1687   for (i=0; i<m; i++) {
1688     PetscInt          row    = i + rstart;
1689     PetscInt          ncols  = ii[i+1] - ii[i];
1690     const PetscInt    *icols = jj + ii[i];
1691     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1692     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1693   }
1694 
1695   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1696   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 EXTERN_C_END
1702 
1703 EXTERN_C_BEGIN
1704 #if defined(PETSC_HAVE_MUMPS)
1705 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1706 #endif
1707 #if defined(PETSC_HAVE_PASTIX)
1708 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1709 #endif
1710 EXTERN_C_END
1711 
1712 /*MC
1713    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1714    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1715    the matrix is stored.
1716 
1717   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1718   can call MatSetOption(Mat, MAT_HERMITIAN);
1719 
1720    Options Database Keys:
1721 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1722 
1723   Level: beginner
1724 
1725 .seealso: MatCreateMPISBAIJ
1726 M*/
1727 
1728 EXTERN_C_BEGIN
1729 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1730 EXTERN_C_END
1731 
1732 EXTERN_C_BEGIN
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatCreate_MPISBAIJ"
1735 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1736 {
1737   Mat_MPISBAIJ   *b;
1738   PetscErrorCode ierr;
1739   PetscBool      flg;
1740 
1741   PetscFunctionBegin;
1742 
1743   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1744   B->data = (void*)b;
1745   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1746 
1747   B->ops->destroy    = MatDestroy_MPISBAIJ;
1748   B->ops->view       = MatView_MPISBAIJ;
1749   B->assembled       = PETSC_FALSE;
1750 
1751   B->insertmode = NOT_SET_VALUES;
1752   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1753   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1754 
1755   /* build local table of row and column ownerships */
1756   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1757 
1758   /* build cache for off array entries formed */
1759   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1760   b->donotstash  = PETSC_FALSE;
1761   b->colmap      = PETSC_NULL;
1762   b->garray      = PETSC_NULL;
1763   b->roworiented = PETSC_TRUE;
1764 
1765   /* stuff used in block assembly */
1766   b->barray       = 0;
1767 
1768   /* stuff used for matrix vector multiply */
1769   b->lvec         = 0;
1770   b->Mvctx        = 0;
1771   b->slvec0       = 0;
1772   b->slvec0b      = 0;
1773   b->slvec1       = 0;
1774   b->slvec1a      = 0;
1775   b->slvec1b      = 0;
1776   b->sMvctx       = 0;
1777 
1778   /* stuff for MatGetRow() */
1779   b->rowindices   = 0;
1780   b->rowvalues    = 0;
1781   b->getrowactive = PETSC_FALSE;
1782 
1783   /* hash table stuff */
1784   b->ht           = 0;
1785   b->hd           = 0;
1786   b->ht_size      = 0;
1787   b->ht_flag      = PETSC_FALSE;
1788   b->ht_fact      = 0;
1789   b->ht_total_ct  = 0;
1790   b->ht_insert_ct = 0;
1791 
1792   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1793   b->ijonly       = PETSC_FALSE;
1794 
1795   b->in_loc       = 0;
1796   b->v_loc        = 0;
1797   b->n_loc        = 0;
1798   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1799     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1800     if (flg) {
1801       PetscReal fact = 1.39;
1802       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1803       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1804       if (fact <= 1.0) fact = 1.39;
1805       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1806       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1807     }
1808   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1809 
1810 #if defined(PETSC_HAVE_PASTIX)
1811   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1812                                            "MatGetFactor_mpisbaij_pastix",
1813                                            MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1814 #endif
1815 #if defined(PETSC_HAVE_MUMPS)
1816   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1817                                      "MatGetFactor_sbaij_mumps",
1818                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1819 #endif
1820   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1821                                      "MatStoreValues_MPISBAIJ",
1822                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1823   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1824                                      "MatRetrieveValues_MPISBAIJ",
1825                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1826   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1827                                      "MatGetDiagonalBlock_MPISBAIJ",
1828                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1830                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1831                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1833                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1834                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1836                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1837                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1838 
1839   B->symmetric                  = PETSC_TRUE;
1840   B->structurally_symmetric     = PETSC_TRUE;
1841   B->symmetric_set              = PETSC_TRUE;
1842   B->structurally_symmetric_set = PETSC_TRUE;
1843   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1844   PetscFunctionReturn(0);
1845 }
1846 EXTERN_C_END
1847 
1848 /*MC
1849    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1850 
1851    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1852    and MATMPISBAIJ otherwise.
1853 
1854    Options Database Keys:
1855 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1856 
1857   Level: beginner
1858 
1859 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1860 M*/
1861 
1862 #undef __FUNCT__
1863 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1864 /*@C
1865    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1866    the user should preallocate the matrix storage by setting the parameters
1867    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1868    performance can be increased by more than a factor of 50.
1869 
1870    Collective on Mat
1871 
1872    Input Parameters:
1873 +  A - the matrix
1874 .  bs   - size of blockk
1875 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1876            submatrix  (same for all local rows)
1877 .  d_nnz - array containing the number of block nonzeros in the various block rows
1878            in the upper triangular and diagonal part of the in diagonal portion of the local
1879            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1880            for the diagonal entry and set a value even if it is zero.
1881 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1882            submatrix (same for all local rows).
1883 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1884            off-diagonal portion of the local submatrix that is right of the diagonal
1885            (possibly different for each block row) or PETSC_NULL.
1886 
1887 
1888    Options Database Keys:
1889 .   -mat_no_unroll - uses code that does not unroll the loops in the
1890                      block calculations (much slower)
1891 .   -mat_block_size - size of the blocks to use
1892 
1893    Notes:
1894 
1895    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1896    than it must be used on all processors that share the object for that argument.
1897 
1898    If the *_nnz parameter is given then the *_nz parameter is ignored
1899 
1900    Storage Information:
1901    For a square global matrix we define each processor's diagonal portion
1902    to be its local rows and the corresponding columns (a square submatrix);
1903    each processor's off-diagonal portion encompasses the remainder of the
1904    local matrix (a rectangular submatrix).
1905 
1906    The user can specify preallocated storage for the diagonal part of
1907    the local submatrix with either d_nz or d_nnz (not both).  Set
1908    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1909    memory allocation.  Likewise, specify preallocated storage for the
1910    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1911 
1912    You can call MatGetInfo() to get information on how effective the preallocation was;
1913    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1914    You can also run with the option -info and look for messages with the string
1915    malloc in them to see if additional memory allocation was needed.
1916 
1917    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1918    the figure below we depict these three local rows and all columns (0-11).
1919 
1920 .vb
1921            0 1 2 3 4 5 6 7 8 9 10 11
1922           -------------------
1923    row 3  |  . . . d d d o o o o o o
1924    row 4  |  . . . d d d o o o o o o
1925    row 5  |  . . . d d d o o o o o o
1926           -------------------
1927 .ve
1928 
1929    Thus, any entries in the d locations are stored in the d (diagonal)
1930    submatrix, and any entries in the o locations are stored in the
1931    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1932    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1933 
1934    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1935    plus the diagonal part of the d matrix,
1936    and o_nz should indicate the number of block nonzeros per row in the o matrix
1937 
1938    In general, for PDE problems in which most nonzeros are near the diagonal,
1939    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1940    or you will get TERRIBLE performance; see the users' manual chapter on
1941    matrices.
1942 
1943    Level: intermediate
1944 
1945 .keywords: matrix, block, aij, compressed row, sparse, parallel
1946 
1947 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1948 @*/
1949 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1950 {
1951   PetscErrorCode ierr;
1952 
1953   PetscFunctionBegin;
1954   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1955   PetscValidType(B,1);
1956   PetscValidLogicalCollectiveInt(B,bs,2);
1957   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatCreateSBAIJ"
1963 /*@C
1964    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1965    (block compressed row).  For good matrix assembly performance
1966    the user should preallocate the matrix storage by setting the parameters
1967    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1968    performance can be increased by more than a factor of 50.
1969 
1970    Collective on MPI_Comm
1971 
1972    Input Parameters:
1973 +  comm - MPI communicator
1974 .  bs   - size of blockk
1975 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1976            This value should be the same as the local size used in creating the
1977            y vector for the matrix-vector product y = Ax.
1978 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1979            This value should be the same as the local size used in creating the
1980            x vector for the matrix-vector product y = Ax.
1981 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1982 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1983 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1984            submatrix  (same for all local rows)
1985 .  d_nnz - array containing the number of block nonzeros in the various block rows
1986            in the upper triangular portion of the in diagonal portion of the local
1987            (possibly different for each block block row) or PETSC_NULL.
1988            If you plan to factor the matrix you must leave room for the diagonal entry and
1989            set its value even if it is zero.
1990 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1991            submatrix (same for all local rows).
1992 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1993            off-diagonal portion of the local submatrix (possibly different for
1994            each block row) or PETSC_NULL.
1995 
1996    Output Parameter:
1997 .  A - the matrix
1998 
1999    Options Database Keys:
2000 .   -mat_no_unroll - uses code that does not unroll the loops in the
2001                      block calculations (much slower)
2002 .   -mat_block_size - size of the blocks to use
2003 .   -mat_mpi - use the parallel matrix data structures even on one processor
2004                (defaults to using SeqBAIJ format on one processor)
2005 
2006    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2007    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2008    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2009 
2010    Notes:
2011    The number of rows and columns must be divisible by blocksize.
2012    This matrix type does not support complex Hermitian operation.
2013 
2014    The user MUST specify either the local or global matrix dimensions
2015    (possibly both).
2016 
2017    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2018    than it must be used on all processors that share the object for that argument.
2019 
2020    If the *_nnz parameter is given then the *_nz parameter is ignored
2021 
2022    Storage Information:
2023    For a square global matrix we define each processor's diagonal portion
2024    to be its local rows and the corresponding columns (a square submatrix);
2025    each processor's off-diagonal portion encompasses the remainder of the
2026    local matrix (a rectangular submatrix).
2027 
2028    The user can specify preallocated storage for the diagonal part of
2029    the local submatrix with either d_nz or d_nnz (not both).  Set
2030    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2031    memory allocation.  Likewise, specify preallocated storage for the
2032    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2033 
2034    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2035    the figure below we depict these three local rows and all columns (0-11).
2036 
2037 .vb
2038            0 1 2 3 4 5 6 7 8 9 10 11
2039           -------------------
2040    row 3  |  . . . d d d o o o o o o
2041    row 4  |  . . . d d d o o o o o o
2042    row 5  |  . . . d d d o o o o o o
2043           -------------------
2044 .ve
2045 
2046    Thus, any entries in the d locations are stored in the d (diagonal)
2047    submatrix, and any entries in the o locations are stored in the
2048    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2049    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2050 
2051    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2052    plus the diagonal part of the d matrix,
2053    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2054    In general, for PDE problems in which most nonzeros are near the diagonal,
2055    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2056    or you will get TERRIBLE performance; see the users' manual chapter on
2057    matrices.
2058 
2059    Level: intermediate
2060 
2061 .keywords: matrix, block, aij, compressed row, sparse, parallel
2062 
2063 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2064 @*/
2065 
2066 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2067 {
2068   PetscErrorCode ierr;
2069   PetscMPIInt    size;
2070 
2071   PetscFunctionBegin;
2072   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2073   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2074   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2075   if (size > 1) {
2076     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2077     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2078   } else {
2079     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2080     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2081   }
2082   PetscFunctionReturn(0);
2083 }
2084 
2085 
2086 #undef __FUNCT__
2087 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2088 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2089 {
2090   Mat            mat;
2091   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2092   PetscErrorCode ierr;
2093   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2094   PetscScalar    *array;
2095 
2096   PetscFunctionBegin;
2097   *newmat       = 0;
2098   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2099   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2100   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2101   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2102   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2103   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2104 
2105   mat->factortype   = matin->factortype;
2106   mat->preallocated = PETSC_TRUE;
2107   mat->assembled    = PETSC_TRUE;
2108   mat->insertmode   = NOT_SET_VALUES;
2109 
2110   a = (Mat_MPISBAIJ*)mat->data;
2111   a->bs2   = oldmat->bs2;
2112   a->mbs   = oldmat->mbs;
2113   a->nbs   = oldmat->nbs;
2114   a->Mbs   = oldmat->Mbs;
2115   a->Nbs   = oldmat->Nbs;
2116 
2117 
2118   a->size         = oldmat->size;
2119   a->rank         = oldmat->rank;
2120   a->donotstash   = oldmat->donotstash;
2121   a->roworiented  = oldmat->roworiented;
2122   a->rowindices   = 0;
2123   a->rowvalues    = 0;
2124   a->getrowactive = PETSC_FALSE;
2125   a->barray       = 0;
2126   a->rstartbs    = oldmat->rstartbs;
2127   a->rendbs      = oldmat->rendbs;
2128   a->cstartbs    = oldmat->cstartbs;
2129   a->cendbs      = oldmat->cendbs;
2130 
2131   /* hash table stuff */
2132   a->ht           = 0;
2133   a->hd           = 0;
2134   a->ht_size      = 0;
2135   a->ht_flag      = oldmat->ht_flag;
2136   a->ht_fact      = oldmat->ht_fact;
2137   a->ht_total_ct  = 0;
2138   a->ht_insert_ct = 0;
2139 
2140   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2141   if (oldmat->colmap) {
2142 #if defined (PETSC_USE_CTABLE)
2143     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2144 #else
2145     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2146     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2147     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2148 #endif
2149   } else a->colmap = 0;
2150 
2151   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2152     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2153     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2154     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2155   } else a->garray = 0;
2156 
2157   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2158   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2159   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2160   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2161   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2162 
2163   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2164   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2165   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2166   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2167 
2168   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2169   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2170   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2171   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2172   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2173   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2174   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2175   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2177   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2178   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2179   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2180   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2181 
2182   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2183   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2184   a->sMvctx = oldmat->sMvctx;
2185   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2186 
2187   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2188   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2189   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2190   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2191   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2192   *newmat = mat;
2193   PetscFunctionReturn(0);
2194 }
2195 
2196 #undef __FUNCT__
2197 #define __FUNCT__ "MatLoad_MPISBAIJ"
2198 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2199 {
2200   PetscErrorCode ierr;
2201   PetscInt       i,nz,j,rstart,rend;
2202   PetscScalar    *vals,*buf;
2203   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2204   MPI_Status     status;
2205   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2206   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2207   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2208   PetscInt       bs=1,Mbs,mbs,extra_rows;
2209   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2210   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2211   int            fd;
2212 
2213   PetscFunctionBegin;
2214   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2215     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2216   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2217 
2218   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2219   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2220   if (!rank) {
2221     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2222     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2223     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2224     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2225   }
2226 
2227   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2228 
2229   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2230   M = header[1]; N = header[2];
2231 
2232   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2233   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2234   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2235 
2236   /* If global sizes are set, check if they are consistent with that given in the file */
2237   if (sizesset) {
2238     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2239   }
2240   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2241   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2242 
2243   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2244 
2245   /*
2246      This code adds extra rows to make sure the number of rows is
2247      divisible by the blocksize
2248   */
2249   Mbs        = M/bs;
2250   extra_rows = bs - M + bs*(Mbs);
2251   if (extra_rows == bs) extra_rows = 0;
2252   else                  Mbs++;
2253   if (extra_rows &&!rank) {
2254     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2255   }
2256 
2257   /* determine ownership of all rows */
2258   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2259     mbs        = Mbs/size + ((Mbs % size) > rank);
2260     m          = mbs*bs;
2261   } else { /* User Set */
2262     m          = newmat->rmap->n;
2263     mbs        = m/bs;
2264   }
2265   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2266   mmbs       = PetscMPIIntCast(mbs);
2267   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2268   rowners[0] = 0;
2269   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2270   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2271   rstart = rowners[rank];
2272   rend   = rowners[rank+1];
2273 
2274   /* distribute row lengths to all processors */
2275   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2276   if (!rank) {
2277     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2278     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2279     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2280     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2281     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2282     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2283     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2284   } else {
2285     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2286   }
2287 
2288   if (!rank) {   /* procs[0] */
2289     /* calculate the number of nonzeros on each processor */
2290     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2291     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2292     for (i=0; i<size; i++) {
2293       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2294         procsnz[i] += rowlengths[j];
2295       }
2296     }
2297     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2298 
2299     /* determine max buffer needed and allocate it */
2300     maxnz = 0;
2301     for (i=0; i<size; i++) {
2302       maxnz = PetscMax(maxnz,procsnz[i]);
2303     }
2304     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2305 
2306     /* read in my part of the matrix column indices  */
2307     nz     = procsnz[0];
2308     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2309     mycols = ibuf;
2310     if (size == 1)  nz -= extra_rows;
2311     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2312     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2313 
2314     /* read in every ones (except the last) and ship off */
2315     for (i=1; i<size-1; i++) {
2316       nz   = procsnz[i];
2317       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2318       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2319     }
2320     /* read in the stuff for the last proc */
2321     if (size != 1) {
2322       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2323       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2324       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2325       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2326     }
2327     ierr = PetscFree(cols);CHKERRQ(ierr);
2328   } else {  /* procs[i], i>0 */
2329     /* determine buffer space needed for message */
2330     nz = 0;
2331     for (i=0; i<m; i++) {
2332       nz += locrowlens[i];
2333     }
2334     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2335     mycols = ibuf;
2336     /* receive message of column indices*/
2337     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2338     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2339     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2340   }
2341 
2342   /* loop over local rows, determining number of off diagonal entries */
2343   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2344   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2345   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2346   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2347   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2348   rowcount = 0;
2349   nzcount  = 0;
2350   for (i=0; i<mbs; i++) {
2351     dcount  = 0;
2352     odcount = 0;
2353     for (j=0; j<bs; j++) {
2354       kmax = locrowlens[rowcount];
2355       for (k=0; k<kmax; k++) {
2356         tmp = mycols[nzcount++]/bs; /* block col. index */
2357         if (!mask[tmp]) {
2358           mask[tmp] = 1;
2359           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2360           else masked1[dcount++] = tmp; /* entry in diag portion */
2361         }
2362       }
2363       rowcount++;
2364     }
2365 
2366     dlens[i]  = dcount;  /* d_nzz[i] */
2367     odlens[i] = odcount; /* o_nzz[i] */
2368 
2369     /* zero out the mask elements we set */
2370     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2371     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2372   }
2373     if (!sizesset) {
2374     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2375   }
2376   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2377   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2378 
2379   if (!rank) {
2380     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2381     /* read in my part of the matrix numerical values  */
2382     nz = procsnz[0];
2383     vals = buf;
2384     mycols = ibuf;
2385     if (size == 1)  nz -= extra_rows;
2386     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2387     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2388 
2389     /* insert into matrix */
2390     jj      = rstart*bs;
2391     for (i=0; i<m; i++) {
2392       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2393       mycols += locrowlens[i];
2394       vals   += locrowlens[i];
2395       jj++;
2396     }
2397 
2398     /* read in other processors (except the last one) and ship out */
2399     for (i=1; i<size-1; i++) {
2400       nz   = procsnz[i];
2401       vals = buf;
2402       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2403       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2404     }
2405     /* the last proc */
2406     if (size != 1) {
2407       nz   = procsnz[i] - extra_rows;
2408       vals = buf;
2409       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2410       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2411       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2412     }
2413     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2414 
2415   } else {
2416     /* receive numeric values */
2417     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2418 
2419     /* receive message of values*/
2420     vals   = buf;
2421     mycols = ibuf;
2422     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2423     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2424     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2425 
2426     /* insert into matrix */
2427     jj      = rstart*bs;
2428     for (i=0; i<m; i++) {
2429       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2430       mycols += locrowlens[i];
2431       vals   += locrowlens[i];
2432       jj++;
2433     }
2434   }
2435 
2436   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2437   ierr = PetscFree(buf);CHKERRQ(ierr);
2438   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2439   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2440   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2441   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2442   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2443   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2444   PetscFunctionReturn(0);
2445 }
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2449 /*XXXXX@
2450    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2451 
2452    Input Parameters:
2453 .  mat  - the matrix
2454 .  fact - factor
2455 
2456    Not Collective on Mat, each process can have a different hash factor
2457 
2458    Level: advanced
2459 
2460   Notes:
2461    This can also be set by the command line option: -mat_use_hash_table fact
2462 
2463 .keywords: matrix, hashtable, factor, HT
2464 
2465 .seealso: MatSetOption()
2466 @XXXXX*/
2467 
2468 
2469 #undef __FUNCT__
2470 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2471 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2472 {
2473   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2474   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2475   PetscReal      atmp;
2476   PetscReal      *work,*svalues,*rvalues;
2477   PetscErrorCode ierr;
2478   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2479   PetscMPIInt    rank,size;
2480   PetscInt       *rowners_bs,dest,count,source;
2481   PetscScalar    *va;
2482   MatScalar      *ba;
2483   MPI_Status     stat;
2484 
2485   PetscFunctionBegin;
2486   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2487   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2488   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2489 
2490   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2491   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2492 
2493   bs   = A->rmap->bs;
2494   mbs  = a->mbs;
2495   Mbs  = a->Mbs;
2496   ba   = b->a;
2497   bi   = b->i;
2498   bj   = b->j;
2499 
2500   /* find ownerships */
2501   rowners_bs = A->rmap->range;
2502 
2503   /* each proc creates an array to be distributed */
2504   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2505   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2506 
2507   /* row_max for B */
2508   if (rank != size-1) {
2509     for (i=0; i<mbs; i++) {
2510       ncols = bi[1] - bi[0]; bi++;
2511       brow  = bs*i;
2512       for (j=0; j<ncols; j++) {
2513         bcol = bs*(*bj);
2514         for (kcol=0; kcol<bs; kcol++) {
2515           col = bcol + kcol;                 /* local col index */
2516           col += rowners_bs[rank+1];      /* global col index */
2517           for (krow=0; krow<bs; krow++) {
2518             atmp = PetscAbsScalar(*ba); ba++;
2519             row = brow + krow;    /* local row index */
2520             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2521             if (work[col] < atmp) work[col] = atmp;
2522           }
2523         }
2524         bj++;
2525       }
2526     }
2527 
2528     /* send values to its owners */
2529     for (dest=rank+1; dest<size; dest++) {
2530       svalues = work + rowners_bs[dest];
2531       count   = rowners_bs[dest+1]-rowners_bs[dest];
2532       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2533     }
2534   }
2535 
2536   /* receive values */
2537   if (rank) {
2538     rvalues = work;
2539     count   = rowners_bs[rank+1]-rowners_bs[rank];
2540     for (source=0; source<rank; source++) {
2541       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2542       /* process values */
2543       for (i=0; i<count; i++) {
2544         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2545       }
2546     }
2547   }
2548 
2549   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2550   ierr = PetscFree(work);CHKERRQ(ierr);
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatSOR_MPISBAIJ"
2556 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2557 {
2558   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2559   PetscErrorCode    ierr;
2560   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2561   PetscScalar       *x,*ptr,*from;
2562   Vec               bb1;
2563   const PetscScalar *b;
2564 
2565   PetscFunctionBegin;
2566   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2567   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2568 
2569   if (flag == SOR_APPLY_UPPER) {
2570     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2571     PetscFunctionReturn(0);
2572   }
2573 
2574   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2575     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2576       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2577       its--;
2578     }
2579 
2580     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2581     while (its--) {
2582 
2583       /* lower triangular part: slvec0b = - B^T*xx */
2584       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2585 
2586       /* copy xx into slvec0a */
2587       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2588       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2589       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2590       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2591 
2592       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2593 
2594       /* copy bb into slvec1a */
2595       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2596       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2597       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2598       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2599 
2600       /* set slvec1b = 0 */
2601       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2602 
2603       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2604       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2605       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2606       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2607 
2608       /* upper triangular part: bb1 = bb1 - B*x */
2609       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2610 
2611       /* local diagonal sweep */
2612       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2613     }
2614     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2615   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2616     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2617   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2618     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2619   } else if (flag & SOR_EISENSTAT) {
2620     Vec               xx1;
2621     PetscBool         hasop;
2622     const PetscScalar *diag;
2623     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2624     PetscInt          i,n;
2625 
2626     if (!mat->xx1) {
2627       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2628       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2629     }
2630     xx1 = mat->xx1;
2631     bb1 = mat->bb1;
2632 
2633     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2634 
2635     if (!mat->diag) {
2636       /* this is wrong for same matrix with new nonzero values */
2637       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2638       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2639     }
2640     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2641 
2642     if (hasop) {
2643       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2644       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2645     } else {
2646       /*
2647           These two lines are replaced by code that may be a bit faster for a good compiler
2648       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2649       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2650       */
2651       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2652       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2653       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2654       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2655       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2656       if (omega == 1.0) {
2657         for (i=0; i<n; i++) {
2658           sl[i] = b[i] - diag[i]*x[i];
2659         }
2660         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2661       } else {
2662         for (i=0; i<n; i++) {
2663           sl[i] = b[i] + scale*diag[i]*x[i];
2664         }
2665         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2666       }
2667       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2668       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2669       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2670       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2671     }
2672 
2673     /* multiply off-diagonal portion of matrix */
2674     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2675     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2676     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2677     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2678     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2679     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2680     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2681     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2682     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2683     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2684 
2685     /* local sweep */
2686     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2687     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2688   } else {
2689     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2690   }
2691   PetscFunctionReturn(0);
2692 }
2693 
2694 #undef __FUNCT__
2695 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2696 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2697 {
2698   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2699   PetscErrorCode ierr;
2700   Vec            lvec1,bb1;
2701 
2702   PetscFunctionBegin;
2703   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2704   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2705 
2706   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2707     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2708       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2709       its--;
2710     }
2711 
2712     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2713     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2714     while (its--) {
2715       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2716 
2717       /* lower diagonal part: bb1 = bb - B^T*xx */
2718       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2719       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2720 
2721       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2722       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2723       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2724 
2725       /* upper diagonal part: bb1 = bb1 - B*x */
2726       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2727       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2728 
2729       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2730 
2731       /* diagonal sweep */
2732       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2733     }
2734     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2735     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2736   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2737   PetscFunctionReturn(0);
2738 }
2739 
2740 #undef __FUNCT__
2741 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2742 /*@
2743      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2744          CSR format the local rows.
2745 
2746    Collective on MPI_Comm
2747 
2748    Input Parameters:
2749 +  comm - MPI communicator
2750 .  bs - the block size, only a block size of 1 is supported
2751 .  m - number of local rows (Cannot be PETSC_DECIDE)
2752 .  n - This value should be the same as the local size used in creating the
2753        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2754        calculated if N is given) For square matrices n is almost always m.
2755 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2756 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2757 .   i - row indices
2758 .   j - column indices
2759 -   a - matrix values
2760 
2761    Output Parameter:
2762 .   mat - the matrix
2763 
2764    Level: intermediate
2765 
2766    Notes:
2767        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2768      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2769      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2770 
2771        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2772 
2773 .keywords: matrix, aij, compressed row, sparse, parallel
2774 
2775 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2776           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2777 @*/
2778 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2779 {
2780   PetscErrorCode ierr;
2781 
2782 
2783  PetscFunctionBegin;
2784   if (i[0]) {
2785     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2786   }
2787   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2788   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2789   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2790   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2791   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2792   PetscFunctionReturn(0);
2793 }
2794 
2795 
2796 #undef __FUNCT__
2797 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2798 /*@C
2799    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2800    (the default parallel PETSc format).
2801 
2802    Collective on MPI_Comm
2803 
2804    Input Parameters:
2805 +  A - the matrix
2806 .  bs - the block size
2807 .  i - the indices into j for the start of each local row (starts with zero)
2808 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2809 -  v - optional values in the matrix
2810 
2811    Level: developer
2812 
2813 .keywords: matrix, aij, compressed row, sparse, parallel
2814 
2815 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2816 @*/
2817 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2818 {
2819   PetscErrorCode ierr;
2820 
2821   PetscFunctionBegin;
2822   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2823   PetscFunctionReturn(0);
2824 }
2825 
2826 
2827