xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d54338ec4fdf48b56b5efdd9bf04ca9d50d6ab04)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash) {
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j];
200           else             value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j];
229           else             value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
236       if (!baij->donotstash) {
237         mat->assembled = PETSC_FALSE;
238         n_loc = 0;
239         for (j=0; j<n; j++) {
240           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
241           in_loc[n_loc] = in[j];
242           if (roworiented) {
243             v_loc[n_loc] = v[i*n+j];
244           } else {
245             v_loc[n_loc] = v[j*m+i];
246           }
247           n_loc++;
248         }
249         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
250       }
251     }
252   }
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
259 {
260   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
261   const MatScalar *value;
262   MatScalar       *barray=baij->barray;
263   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
264   PetscErrorCode  ierr;
265   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
266   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
267   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
268 
269   PetscFunctionBegin;
270   if (!barray) {
271     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
272     baij->barray = barray;
273   }
274 
275   if (roworiented) {
276     stepval = (n-1)*bs;
277   } else {
278     stepval = (m-1)*bs;
279   }
280   for (i=0; i<m; i++) {
281     if (im[i] < 0) continue;
282 #if defined(PETSC_USE_DEBUG)
283     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
284 #endif
285     if (im[i] >= rstart && im[i] < rend) {
286       row = im[i] - rstart;
287       for (j=0; j<n; j++) {
288         if (im[i] > in[j]) {
289           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
290           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
291         }
292         /* If NumCol = 1 then a copy is not required */
293         if ((roworiented) && (n == 1)) {
294           barray = (MatScalar*) v + i*bs2;
295         } else if ((!roworiented) && (m == 1)) {
296           barray = (MatScalar*) v + j*bs2;
297         } else { /* Here a copy is required */
298           if (roworiented) {
299             value = v + i*(stepval+bs)*bs + j*bs;
300           } else {
301             value = v + j*(stepval+bs)*bs + i*bs;
302           }
303           for (ii=0; ii<bs; ii++,value+=stepval) {
304             for (jj=0; jj<bs; jj++) {
305               *barray++  = *value++;
306             }
307           }
308           barray -=bs2;
309         }
310 
311         if (in[j] >= cstart && in[j] < cend) {
312           col  = in[j] - cstart;
313           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
314         }
315         else if (in[j] < 0) continue;
316 #if defined(PETSC_USE_DEBUG)
317         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
318 #endif
319         else {
320           if (mat->was_assembled) {
321             if (!baij->colmap) {
322               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
323             }
324 
325 #if defined(PETSC_USE_DEBUG)
326 #if defined (PETSC_USE_CTABLE)
327             { PetscInt data;
328               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
329               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330             }
331 #else
332             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
333 #endif
334 #endif
335 #if defined (PETSC_USE_CTABLE)
336             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
337             col  = (col - 1)/bs;
338 #else
339             col = (baij->colmap[in[j]] - 1)/bs;
340 #endif
341             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
342               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
343               col =  in[j];
344             }
345           }
346           else col = in[j];
347           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
348         }
349       }
350     } else {
351       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
352       if (!baij->donotstash) {
353         if (roworiented) {
354           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         } else {
356           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
357         }
358       }
359     }
360   }
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNCT__
365 #define __FUNCT__ "MatGetValues_MPISBAIJ"
366 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
367 {
368   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
369   PetscErrorCode ierr;
370   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
371   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
372 
373   PetscFunctionBegin;
374   for (i=0; i<m; i++) {
375     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
376     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
377     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
378       row = idxm[i] - bsrstart;
379       for (j=0; j<n; j++) {
380         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
381         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
382         if (idxn[j] >= bscstart && idxn[j] < bscend) {
383           col = idxn[j] - bscstart;
384           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
385         } else {
386           if (!baij->colmap) {
387             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388           }
389 #if defined (PETSC_USE_CTABLE)
390           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
391           data --;
392 #else
393           data = baij->colmap[idxn[j]/bs]-1;
394 #endif
395           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
396           else {
397             col  = data + idxn[j]%bs;
398             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
399           }
400         }
401       }
402     } else {
403       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
404     }
405   }
406  PetscFunctionReturn(0);
407 }
408 
409 #undef __FUNCT__
410 #define __FUNCT__ "MatNorm_MPISBAIJ"
411 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
412 {
413   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
414   PetscErrorCode ierr;
415   PetscReal      sum[2],*lnorm2;
416 
417   PetscFunctionBegin;
418   if (baij->size == 1) {
419     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
420   } else {
421     if (type == NORM_FROBENIUS) {
422       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
423       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
425       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
426       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
427       ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
428       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
429       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
430     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
431       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
432       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
433       PetscReal    *rsum,*rsum2,vabs;
434       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
435       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
436       MatScalar    *v;
437 
438       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
439       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
440       /* Amat */
441       v = amat->a; jj = amat->j;
442       for (brow=0; brow<mbs; brow++) {
443         grow = bs*(rstart + brow);
444         nz = amat->i[brow+1] - amat->i[brow];
445         for (bcol=0; bcol<nz; bcol++) {
446           gcol = bs*(rstart + *jj); jj++;
447           for (col=0; col<bs; col++) {
448             for (row=0; row<bs; row++) {
449               vabs = PetscAbsScalar(*v); v++;
450               rsum[gcol+col] += vabs;
451               /* non-diagonal block */
452               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
453             }
454           }
455         }
456       }
457       /* Bmat */
458       v = bmat->a; jj = bmat->j;
459       for (brow=0; brow<mbs; brow++) {
460         grow = bs*(rstart + brow);
461         nz = bmat->i[brow+1] - bmat->i[brow];
462         for (bcol=0; bcol<nz; bcol++) {
463           gcol = bs*garray[*jj]; jj++;
464           for (col=0; col<bs; col++) {
465             for (row=0; row<bs; row++) {
466               vabs = PetscAbsScalar(*v); v++;
467               rsum[gcol+col] += vabs;
468               rsum[grow+row] += vabs;
469             }
470           }
471         }
472       }
473       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
474       *norm = 0.0;
475       for (col=0; col<mat->cmap->N; col++) {
476         if (rsum2[col] > *norm) *norm = rsum2[col];
477       }
478       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
479     } else {
480       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
481     }
482   }
483   PetscFunctionReturn(0);
484 }
485 
486 #undef __FUNCT__
487 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
488 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
489 {
490   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
491   PetscErrorCode ierr;
492   PetscInt       nstash,reallocs;
493   InsertMode     addv;
494 
495   PetscFunctionBegin;
496   if (baij->donotstash || mat->nooffprocentries) {
497     PetscFunctionReturn(0);
498   }
499 
500   /* make sure all processors are either in INSERTMODE or ADDMODE */
501   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
502   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
503   mat->insertmode = addv; /* in case this processor had no cache */
504 
505   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
506   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
510   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
511   PetscFunctionReturn(0);
512 }
513 
514 #undef __FUNCT__
515 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
516 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
517 {
518   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
519   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
520   PetscErrorCode ierr;
521   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
522   PetscInt       *row,*col;
523   PetscBool      other_disassembled;
524   PetscMPIInt    n;
525   PetscBool      r1,r2,r3;
526   MatScalar      *val;
527   InsertMode     addv = mat->insertmode;
528 
529   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
530   PetscFunctionBegin;
531   if (!baij->donotstash &&  !mat->nooffprocentries) {
532     while (1) {
533       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
534       if (!flg) break;
535 
536       for (i=0; i<n;) {
537         /* Now identify the consecutive vals belonging to the same row */
538         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
539         if (j < n) ncols = j-i;
540         else       ncols = n-i;
541         /* Now assemble all these values with a single function call */
542         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
543         i = j;
544       }
545     }
546     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
547     /* Now process the block-stash. Since the values are stashed column-oriented,
548        set the roworiented flag to column oriented, and after MatSetValues()
549        restore the original flags */
550     r1 = baij->roworiented;
551     r2 = a->roworiented;
552     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
553     baij->roworiented = PETSC_FALSE;
554     a->roworiented    = PETSC_FALSE;
555     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
556     while (1) {
557       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
558       if (!flg) break;
559 
560       for (i=0; i<n;) {
561         /* Now identify the consecutive vals belonging to the same row */
562         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
563         if (j < n) ncols = j-i;
564         else       ncols = n-i;
565         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
566         i = j;
567       }
568     }
569     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
570     baij->roworiented = r1;
571     a->roworiented    = r2;
572     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
573   }
574 
575   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
576   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
577 
578   /* determine if any processor has disassembled, if so we must
579      also disassemble ourselfs, in order that we may reassemble. */
580   /*
581      if nonzero structure of submatrix B cannot change then we know that
582      no processor disassembled thus we can skip this stuff
583   */
584   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
585     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
586     if (mat->was_assembled && !other_disassembled) {
587       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
588     }
589   }
590 
591   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
592     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
593   }
594   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
595   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
596   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
597 
598   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
599   baij->rowvalues = 0;
600 
601   PetscFunctionReturn(0);
602 }
603 
604 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
605 #undef __FUNCT__
606 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
607 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
608 {
609   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
610   PetscErrorCode    ierr;
611   PetscInt          bs = mat->rmap->bs;
612   PetscMPIInt       size = baij->size,rank = baij->rank;
613   PetscBool         iascii,isdraw;
614   PetscViewer       sviewer;
615   PetscViewerFormat format;
616 
617   PetscFunctionBegin;
618   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
619   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
620   if (iascii) {
621     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
622     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
623       MatInfo info;
624       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
625       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
634       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
635       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_INFO) {
638       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
639       PetscFunctionReturn(0);
640     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
641       PetscFunctionReturn(0);
642     }
643   }
644 
645   if (isdraw) {
646     PetscDraw  draw;
647     PetscBool  isnull;
648     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
649     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
650   }
651 
652   if (size == 1) {
653     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
654     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
655   } else {
656     /* assemble the entire matrix onto first processor. */
657     Mat          A;
658     Mat_SeqSBAIJ *Aloc;
659     Mat_SeqBAIJ  *Bloc;
660     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
661     MatScalar    *a;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
672     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
673     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
674 
675     /* copy over the A part */
676     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
677     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
678     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
679 
680     for (i=0; i<mbs; i++) {
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = (baij->cstartbs+aj[j])*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++; a += bs;
688         }
689       }
690     }
691     /* copy over the B part */
692     Bloc = (Mat_SeqBAIJ*)baij->B->data;
693     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
694     for (i=0; i<mbs; i++) {
695 
696       rvals[0] = bs*(baij->rstartbs + i);
697       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
698       for (j=ai[i]; j<ai[i+1]; j++) {
699         col = baij->garray[aj[j]]*bs;
700         for (k=0; k<bs; k++) {
701           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
702           col++; a += bs;
703         }
704       }
705     }
706     ierr = PetscFree(rvals);CHKERRQ(ierr);
707     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
709     /*
710        Everyone has to call to draw the matrix since the graphics waits are
711        synchronized across all processors that share the PetscDraw object
712     */
713     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
714     if (!rank) {
715       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
716           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
717       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
718       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
719     }
720     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
721     ierr = MatDestroy(&A);CHKERRQ(ierr);
722   }
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatView_MPISBAIJ"
728 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
729 {
730   PetscErrorCode ierr;
731   PetscBool      iascii,isdraw,issocket,isbinary;
732 
733   PetscFunctionBegin;
734   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
738   if (iascii || isdraw || issocket || isbinary) {
739     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
740   } else {
741     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatDestroy_MPISBAIJ"
748 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
749 {
750   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754 #if defined(PETSC_USE_LOG)
755   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
756 #endif
757   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
758   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
760   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
761 #if defined (PETSC_USE_CTABLE)
762   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
763 #else
764   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
765 #endif
766   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
768   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
774   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
775   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
776   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
777   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
781 #if defined(PETSC_USE_REAL_MAT_SINGLE)
782   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
783 #endif
784   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
787   ierr = PetscFree(mat->data);CHKERRQ(ierr);
788 
789   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
801 {
802   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
805   PetscScalar    *x,*from;
806 
807   PetscFunctionBegin;
808   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
809   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
810 
811   /* diagonal part */
812   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
813   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
814 
815   /* subdiagonal part */
816   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
817 
818   /* copy x into the vec slvec0 */
819   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
823   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825 
826   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828   /* supperdiagonal part */
829   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMult_MPISBAIJ"
835 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
836 {
837   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
838   PetscErrorCode ierr;
839   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
840   PetscScalar    *x,*from;
841 
842   PetscFunctionBegin;
843   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
844   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
879 
880   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
881   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
882 
883   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   /* do diagonal part */
885   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
886   /* do supperdiagonal part */
887   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
889   /* do subdiagonal part */
890   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
891   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
893 
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
899 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
900 {
901   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
902   PetscErrorCode ierr;
903   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
904   PetscScalar    *x,*from,zero=0.0;
905 
906   PetscFunctionBegin;
907   /*
908   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
909   PetscSynchronizedFlush(((PetscObject)A)->comm);
910   */
911   /* diagonal part */
912   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
913   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
914 
915   /* subdiagonal part */
916   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
917 
918   /* copy x into the vec slvec0 */
919   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
920   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
921   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
922   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
923 
924   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
925   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
926   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
927 
928   /* supperdiagonal part */
929   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
930 
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   /* do diagonal part */
944   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
945   /* do supperdiagonal part */
946   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
948 
949   /* do subdiagonal part */
950   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
951   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
953 
954   PetscFunctionReturn(0);
955 }
956 
957 /*
958   This only works correctly for square matrices where the subblock A->A is the
959    diagonal block
960 */
961 #undef __FUNCT__
962 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
963 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
964 {
965   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
966   PetscErrorCode ierr;
967 
968   PetscFunctionBegin;
969   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
970   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
971   PetscFunctionReturn(0);
972 }
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "MatScale_MPISBAIJ"
976 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
977 {
978   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
979   PetscErrorCode ierr;
980 
981   PetscFunctionBegin;
982   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
983   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatGetRow_MPISBAIJ"
989 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
990 {
991   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
992   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
993   PetscErrorCode ierr;
994   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
995   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
996   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
997 
998   PetscFunctionBegin;
999   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
1000   mat->getrowactive = PETSC_TRUE;
1001 
1002   if (!mat->rowvalues && (idx || v)) {
1003     /*
1004         allocate enough space to hold information from the longest row.
1005     */
1006     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1007     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1008     PetscInt     max = 1,mbs = mat->mbs,tmp;
1009     for (i=0; i<mbs; i++) {
1010       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1011       if (max < tmp) { max = tmp; }
1012     }
1013     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1014   }
1015 
1016   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1017   lrow = row - brstart;  /* local row index */
1018 
1019   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1020   if (!v)   {pvA = 0; pvB = 0;}
1021   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1022   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1023   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1024   nztot = nzA + nzB;
1025 
1026   cmap  = mat->garray;
1027   if (v  || idx) {
1028     if (nztot) {
1029       /* Sort by increasing column numbers, assuming A and B already sorted */
1030       PetscInt imark = -1;
1031       if (v) {
1032         *v = v_p = mat->rowvalues;
1033         for (i=0; i<nzB; i++) {
1034           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1035           else break;
1036         }
1037         imark = i;
1038         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1039         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1040       }
1041       if (idx) {
1042         *idx = idx_p = mat->rowindices;
1043         if (imark > -1) {
1044           for (i=0; i<imark; i++) {
1045             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1046           }
1047         } else {
1048           for (i=0; i<nzB; i++) {
1049             if (cmap[cworkB[i]/bs] < cstart)
1050               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1051             else break;
1052           }
1053           imark = i;
1054         }
1055         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1056         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1057       }
1058     } else {
1059       if (idx) *idx = 0;
1060       if (v)   *v   = 0;
1061     }
1062   }
1063   *nz = nztot;
1064   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1065   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1071 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1072 {
1073   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1074 
1075   PetscFunctionBegin;
1076   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1077   baij->getrowactive = PETSC_FALSE;
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1083 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1084 {
1085   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1086   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1087 
1088   PetscFunctionBegin;
1089   aA->getrow_utriangular = PETSC_TRUE;
1090   PetscFunctionReturn(0);
1091 }
1092 #undef __FUNCT__
1093 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1094 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1095 {
1096   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1097   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1098 
1099   PetscFunctionBegin;
1100   aA->getrow_utriangular = PETSC_FALSE;
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1106 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1107 {
1108   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1109   PetscErrorCode ierr;
1110 
1111   PetscFunctionBegin;
1112   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1113   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1119 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1120 {
1121   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1126   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1132 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1133 {
1134   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1135   PetscErrorCode ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1139   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1145 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1146 {
1147   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1148   Mat            A = a->A,B = a->B;
1149   PetscErrorCode ierr;
1150   PetscReal      isend[5],irecv[5];
1151 
1152   PetscFunctionBegin;
1153   info->block_size     = (PetscReal)matin->rmap->bs;
1154   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1156   isend[3] = info->memory;  isend[4] = info->mallocs;
1157   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159   isend[3] += info->memory;  isend[4] += info->mallocs;
1160   if (flag == MAT_LOCAL) {
1161     info->nz_used      = isend[0];
1162     info->nz_allocated = isend[1];
1163     info->nz_unneeded  = isend[2];
1164     info->memory       = isend[3];
1165     info->mallocs      = isend[4];
1166   } else if (flag == MAT_GLOBAL_MAX) {
1167     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1168     info->nz_used      = irecv[0];
1169     info->nz_allocated = irecv[1];
1170     info->nz_unneeded  = irecv[2];
1171     info->memory       = irecv[3];
1172     info->mallocs      = irecv[4];
1173   } else if (flag == MAT_GLOBAL_SUM) {
1174     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1175     info->nz_used      = irecv[0];
1176     info->nz_allocated = irecv[1];
1177     info->nz_unneeded  = irecv[2];
1178     info->memory       = irecv[3];
1179     info->mallocs      = irecv[4];
1180   } else {
1181     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1182   }
1183   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1184   info->fill_ratio_needed = 0;
1185   info->factor_mallocs    = 0;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1191 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1192 {
1193   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1194   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   switch (op) {
1199   case MAT_NEW_NONZERO_LOCATIONS:
1200   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1201   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1202   case MAT_KEEP_NONZERO_PATTERN:
1203   case MAT_NEW_NONZERO_LOCATION_ERR:
1204     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1206     break;
1207   case MAT_ROW_ORIENTED:
1208     a->roworiented = flg;
1209     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1210     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1211     break;
1212   case MAT_NEW_DIAGONALS:
1213     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1214     break;
1215   case MAT_IGNORE_OFF_PROC_ENTRIES:
1216     a->donotstash = flg;
1217     break;
1218   case MAT_USE_HASH_TABLE:
1219     a->ht_flag = flg;
1220     break;
1221   case MAT_HERMITIAN:
1222     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1223     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1224     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1225     break;
1226   case MAT_SPD:
1227     A->spd_set                         = PETSC_TRUE;
1228     A->spd                             = flg;
1229     if (flg) {
1230       A->symmetric                     = PETSC_TRUE;
1231       A->structurally_symmetric        = PETSC_TRUE;
1232       A->symmetric_set                 = PETSC_TRUE;
1233       A->structurally_symmetric_set    = PETSC_TRUE;
1234     }
1235     break;
1236   case MAT_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_STRUCTURALLY_SYMMETRIC:
1240     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1241     break;
1242   case MAT_SYMMETRY_ETERNAL:
1243     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1244     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1245     break;
1246   case MAT_IGNORE_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_ERROR_LOWER_TRIANGULAR:
1250     aA->ignore_ltriangular = flg;
1251     break;
1252   case MAT_GETROW_UPPERTRIANGULAR:
1253     aA->getrow_utriangular = flg;
1254     break;
1255   default:
1256     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1257   }
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1263 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1264 {
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   if (MAT_INITIAL_MATRIX || *B != A) {
1269     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1270   }
1271   PetscFunctionReturn(0);
1272 }
1273 
1274 #undef __FUNCT__
1275 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1276 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1277 {
1278   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1279   Mat            a=baij->A, b=baij->B;
1280   PetscErrorCode ierr;
1281   PetscInt       nv,m,n;
1282   PetscBool      flg;
1283 
1284   PetscFunctionBegin;
1285   if (ll != rr) {
1286     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1287     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1288   }
1289   if (!ll) PetscFunctionReturn(0);
1290 
1291   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1292   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1293 
1294   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1295   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1296 
1297   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1298 
1299   /* left diagonalscale the off-diagonal part */
1300   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1301 
1302   /* scale the diagonal part */
1303   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1304 
1305   /* right diagonalscale the off-diagonal part */
1306   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1307   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1313 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1314 {
1315   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1316   PetscErrorCode ierr;
1317 
1318   PetscFunctionBegin;
1319   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1324 
1325 #undef __FUNCT__
1326 #define __FUNCT__ "MatEqual_MPISBAIJ"
1327 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1328 {
1329   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1330   Mat            a,b,c,d;
1331   PetscBool      flg;
1332   PetscErrorCode ierr;
1333 
1334   PetscFunctionBegin;
1335   a = matA->A; b = matA->B;
1336   c = matB->A; d = matB->B;
1337 
1338   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1339   if (flg) {
1340     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1341   }
1342   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1343   PetscFunctionReturn(0);
1344 }
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatCopy_MPISBAIJ"
1348 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1349 {
1350   PetscErrorCode ierr;
1351   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1352   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1353 
1354   PetscFunctionBegin;
1355   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1356   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1357     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1358     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1359     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1360   } else {
1361     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1362     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1363   }
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1369 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1370 {
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1380 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1381 {
1382   PetscErrorCode ierr;
1383   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1384   PetscBLASInt   bnz,one=1;
1385   Mat_SeqSBAIJ   *xa,*ya;
1386   Mat_SeqBAIJ    *xb,*yb;
1387 
1388   PetscFunctionBegin;
1389   if (str == SAME_NONZERO_PATTERN) {
1390     PetscScalar alpha = a;
1391     xa = (Mat_SeqSBAIJ *)xx->A->data;
1392     ya = (Mat_SeqSBAIJ *)yy->A->data;
1393     bnz = PetscBLASIntCast(xa->nz);
1394     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1395     xb = (Mat_SeqBAIJ *)xx->B->data;
1396     yb = (Mat_SeqBAIJ *)yy->B->data;
1397     bnz = PetscBLASIntCast(xb->nz);
1398     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1399   } else {
1400     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1401     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1402     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1409 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1410 {
1411   PetscErrorCode ierr;
1412   PetscInt       i;
1413   PetscBool      flg;
1414 
1415   PetscFunctionBegin;
1416   for (i=0; i<n; i++) {
1417     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1418     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1419   }
1420   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 
1425 /* -------------------------------------------------------------------*/
1426 static struct _MatOps MatOps_Values = {
1427        MatSetValues_MPISBAIJ,
1428        MatGetRow_MPISBAIJ,
1429        MatRestoreRow_MPISBAIJ,
1430        MatMult_MPISBAIJ,
1431 /* 4*/ MatMultAdd_MPISBAIJ,
1432        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1433        MatMultAdd_MPISBAIJ,
1434        0,
1435        0,
1436        0,
1437 /*10*/ 0,
1438        0,
1439        0,
1440        MatSOR_MPISBAIJ,
1441        MatTranspose_MPISBAIJ,
1442 /*15*/ MatGetInfo_MPISBAIJ,
1443        MatEqual_MPISBAIJ,
1444        MatGetDiagonal_MPISBAIJ,
1445        MatDiagonalScale_MPISBAIJ,
1446        MatNorm_MPISBAIJ,
1447 /*20*/ MatAssemblyBegin_MPISBAIJ,
1448        MatAssemblyEnd_MPISBAIJ,
1449        MatSetOption_MPISBAIJ,
1450        MatZeroEntries_MPISBAIJ,
1451 /*24*/ 0,
1452        0,
1453        0,
1454        0,
1455        0,
1456 /*29*/ MatSetUp_MPISBAIJ,
1457        0,
1458        0,
1459        0,
1460        0,
1461 /*34*/ MatDuplicate_MPISBAIJ,
1462        0,
1463        0,
1464        0,
1465        0,
1466 /*39*/ MatAXPY_MPISBAIJ,
1467        MatGetSubMatrices_MPISBAIJ,
1468        MatIncreaseOverlap_MPISBAIJ,
1469        MatGetValues_MPISBAIJ,
1470        MatCopy_MPISBAIJ,
1471 /*44*/ 0,
1472        MatScale_MPISBAIJ,
1473        0,
1474        0,
1475        0,
1476 /*49*/ 0,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*54*/ 0,
1482        0,
1483        MatSetUnfactored_MPISBAIJ,
1484        0,
1485        MatSetValuesBlocked_MPISBAIJ,
1486 /*59*/ 0,
1487        0,
1488        0,
1489        0,
1490        0,
1491 /*64*/ 0,
1492        0,
1493        0,
1494        0,
1495        0,
1496 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*74*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*79*/ 0,
1507        0,
1508        0,
1509        0,
1510        MatLoad_MPISBAIJ,
1511 /*84*/ 0,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*89*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*94*/ 0,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*99*/ 0,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*104*/0,
1532        MatRealPart_MPISBAIJ,
1533        MatImaginaryPart_MPISBAIJ,
1534        MatGetRowUpperTriangular_MPISBAIJ,
1535        MatRestoreRowUpperTriangular_MPISBAIJ,
1536 /*109*/0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*114*/0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*119*/0,
1547        0,
1548        0,
1549        0
1550 };
1551 
1552 
1553 EXTERN_C_BEGIN
1554 #undef __FUNCT__
1555 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1556 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1557 {
1558   PetscFunctionBegin;
1559   *a = ((Mat_MPISBAIJ *)A->data)->A;
1560   PetscFunctionReturn(0);
1561 }
1562 EXTERN_C_END
1563 
1564 EXTERN_C_BEGIN
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1567 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1568 {
1569   Mat_MPISBAIJ   *b;
1570   PetscErrorCode ierr;
1571   PetscInt       i,mbs,Mbs;
1572 
1573   PetscFunctionBegin;
1574   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1575   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1576   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1577   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1578 
1579   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1580   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1581   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1582   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1583   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1584 
1585   if (d_nnz) {
1586     for (i=0; i<B->rmap->n/bs; i++) {
1587       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1588     }
1589   }
1590   if (o_nnz) {
1591     for (i=0; i<B->rmap->n/bs; i++) {
1592       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1593     }
1594   }
1595 
1596   b   = (Mat_MPISBAIJ*)B->data;
1597   mbs = B->rmap->n/bs;
1598   Mbs = B->rmap->N/bs;
1599   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1600 
1601   B->rmap->bs  = bs;
1602   b->bs2 = bs*bs;
1603   b->mbs = mbs;
1604   b->nbs = mbs;
1605   b->Mbs = Mbs;
1606   b->Nbs = Mbs;
1607 
1608   for (i=0; i<=b->size; i++) {
1609     b->rangebs[i] = B->rmap->range[i]/bs;
1610   }
1611   b->rstartbs = B->rmap->rstart/bs;
1612   b->rendbs   = B->rmap->rend/bs;
1613 
1614   b->cstartbs = B->cmap->rstart/bs;
1615   b->cendbs   = B->cmap->rend/bs;
1616 
1617   if (!B->preallocated) {
1618     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1619     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1620     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1621     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1622     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1623     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1624     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1625     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1626     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1627   }
1628 
1629   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1630   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1631   B->preallocated = PETSC_TRUE;
1632   PetscFunctionReturn(0);
1633 }
1634 EXTERN_C_END
1635 
1636 EXTERN_C_BEGIN
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1639 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1640 {
1641   PetscInt       m,rstart,cstart,cend;
1642   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1643   const PetscInt *JJ=0;
1644   PetscScalar    *values=0;
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1649   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1650   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1651   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1652   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1653   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1654   m      = B->rmap->n/bs;
1655   rstart = B->rmap->rstart/bs;
1656   cstart = B->cmap->rstart/bs;
1657   cend   = B->cmap->rend/bs;
1658 
1659   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1660   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1661   for (i=0; i<m; i++) {
1662     nz = ii[i+1] - ii[i];
1663     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1664     nz_max = PetscMax(nz_max,nz);
1665     JJ  = jj + ii[i];
1666     for (j=0; j<nz; j++) {
1667       if (*JJ >= cstart) break;
1668       JJ++;
1669     }
1670     d = 0;
1671     for (; j<nz; j++) {
1672       if (*JJ++ >= cend) break;
1673       d++;
1674     }
1675     d_nnz[i] = d;
1676     o_nnz[i] = nz - d;
1677   }
1678   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1679   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1680 
1681   values = (PetscScalar*)V;
1682   if (!values) {
1683     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1684     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1685   }
1686   for (i=0; i<m; i++) {
1687     PetscInt          row    = i + rstart;
1688     PetscInt          ncols  = ii[i+1] - ii[i];
1689     const PetscInt    *icols = jj + ii[i];
1690     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1691     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1692   }
1693 
1694   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1695   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 EXTERN_C_END
1701 
1702 EXTERN_C_BEGIN
1703 #if defined(PETSC_HAVE_MUMPS)
1704 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1705 #endif
1706 #if defined(PETSC_HAVE_PASTIX)
1707 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1708 #endif
1709 EXTERN_C_END
1710 
1711 /*MC
1712    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1713    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1714    the matrix is stored.
1715 
1716   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1717   can call MatSetOption(Mat, MAT_HERMITIAN);
1718 
1719    Options Database Keys:
1720 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1721 
1722   Level: beginner
1723 
1724 .seealso: MatCreateMPISBAIJ
1725 M*/
1726 
1727 EXTERN_C_BEGIN
1728 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1729 EXTERN_C_END
1730 
1731 EXTERN_C_BEGIN
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatCreate_MPISBAIJ"
1734 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1735 {
1736   Mat_MPISBAIJ   *b;
1737   PetscErrorCode ierr;
1738   PetscBool      flg;
1739 
1740   PetscFunctionBegin;
1741   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1742   B->data = (void*)b;
1743   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1744 
1745   B->ops->destroy    = MatDestroy_MPISBAIJ;
1746   B->ops->view       = MatView_MPISBAIJ;
1747   B->assembled       = PETSC_FALSE;
1748 
1749   B->insertmode = NOT_SET_VALUES;
1750   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1751   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1752 
1753   /* build local table of row and column ownerships */
1754   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1755 
1756   /* build cache for off array entries formed */
1757   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1758   b->donotstash  = PETSC_FALSE;
1759   b->colmap      = PETSC_NULL;
1760   b->garray      = PETSC_NULL;
1761   b->roworiented = PETSC_TRUE;
1762 
1763   /* stuff used in block assembly */
1764   b->barray       = 0;
1765 
1766   /* stuff used for matrix vector multiply */
1767   b->lvec         = 0;
1768   b->Mvctx        = 0;
1769   b->slvec0       = 0;
1770   b->slvec0b      = 0;
1771   b->slvec1       = 0;
1772   b->slvec1a      = 0;
1773   b->slvec1b      = 0;
1774   b->sMvctx       = 0;
1775 
1776   /* stuff for MatGetRow() */
1777   b->rowindices   = 0;
1778   b->rowvalues    = 0;
1779   b->getrowactive = PETSC_FALSE;
1780 
1781   /* hash table stuff */
1782   b->ht           = 0;
1783   b->hd           = 0;
1784   b->ht_size      = 0;
1785   b->ht_flag      = PETSC_FALSE;
1786   b->ht_fact      = 0;
1787   b->ht_total_ct  = 0;
1788   b->ht_insert_ct = 0;
1789 
1790   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1791   b->ijonly       = PETSC_FALSE;
1792 
1793   b->in_loc       = 0;
1794   b->v_loc        = 0;
1795   b->n_loc        = 0;
1796   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1797     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1798     if (flg) {
1799       PetscReal fact = 1.39;
1800       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1801       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1802       if (fact <= 1.0) fact = 1.39;
1803       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1804       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1805     }
1806   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1807 
1808 #if defined(PETSC_HAVE_PASTIX)
1809   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1810                                            "MatGetFactor_mpisbaij_pastix",
1811                                            MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1812 #endif
1813 #if defined(PETSC_HAVE_MUMPS)
1814   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1815                                      "MatGetFactor_sbaij_mumps",
1816                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1817 #endif
1818   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1819                                      "MatStoreValues_MPISBAIJ",
1820                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1822                                      "MatRetrieveValues_MPISBAIJ",
1823                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1825                                      "MatGetDiagonalBlock_MPISBAIJ",
1826                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1828                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1829                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1830   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1831                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1832                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1833   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1834                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1835                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1836 
1837   B->symmetric                  = PETSC_TRUE;
1838   B->structurally_symmetric     = PETSC_TRUE;
1839   B->symmetric_set              = PETSC_TRUE;
1840   B->structurally_symmetric_set = PETSC_TRUE;
1841   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1842   PetscFunctionReturn(0);
1843 }
1844 EXTERN_C_END
1845 
1846 /*MC
1847    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1848 
1849    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1850    and MATMPISBAIJ otherwise.
1851 
1852    Options Database Keys:
1853 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1854 
1855   Level: beginner
1856 
1857 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1858 M*/
1859 
1860 #undef __FUNCT__
1861 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1862 /*@C
1863    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1864    the user should preallocate the matrix storage by setting the parameters
1865    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1866    performance can be increased by more than a factor of 50.
1867 
1868    Collective on Mat
1869 
1870    Input Parameters:
1871 +  A - the matrix
1872 .  bs   - size of blockk
1873 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1874            submatrix  (same for all local rows)
1875 .  d_nnz - array containing the number of block nonzeros in the various block rows
1876            in the upper triangular and diagonal part of the in diagonal portion of the local
1877            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1878            for the diagonal entry and set a value even if it is zero.
1879 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1880            submatrix (same for all local rows).
1881 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1882            off-diagonal portion of the local submatrix that is right of the diagonal
1883            (possibly different for each block row) or PETSC_NULL.
1884 
1885 
1886    Options Database Keys:
1887 .   -mat_no_unroll - uses code that does not unroll the loops in the
1888                      block calculations (much slower)
1889 .   -mat_block_size - size of the blocks to use
1890 
1891    Notes:
1892 
1893    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1894    than it must be used on all processors that share the object for that argument.
1895 
1896    If the *_nnz parameter is given then the *_nz parameter is ignored
1897 
1898    Storage Information:
1899    For a square global matrix we define each processor's diagonal portion
1900    to be its local rows and the corresponding columns (a square submatrix);
1901    each processor's off-diagonal portion encompasses the remainder of the
1902    local matrix (a rectangular submatrix).
1903 
1904    The user can specify preallocated storage for the diagonal part of
1905    the local submatrix with either d_nz or d_nnz (not both).  Set
1906    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1907    memory allocation.  Likewise, specify preallocated storage for the
1908    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1909 
1910    You can call MatGetInfo() to get information on how effective the preallocation was;
1911    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1912    You can also run with the option -info and look for messages with the string
1913    malloc in them to see if additional memory allocation was needed.
1914 
1915    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1916    the figure below we depict these three local rows and all columns (0-11).
1917 
1918 .vb
1919            0 1 2 3 4 5 6 7 8 9 10 11
1920           -------------------
1921    row 3  |  . . . d d d o o o o o o
1922    row 4  |  . . . d d d o o o o o o
1923    row 5  |  . . . d d d o o o o o o
1924           -------------------
1925 .ve
1926 
1927    Thus, any entries in the d locations are stored in the d (diagonal)
1928    submatrix, and any entries in the o locations are stored in the
1929    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1930    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1931 
1932    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1933    plus the diagonal part of the d matrix,
1934    and o_nz should indicate the number of block nonzeros per row in the o matrix
1935 
1936    In general, for PDE problems in which most nonzeros are near the diagonal,
1937    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1938    or you will get TERRIBLE performance; see the users' manual chapter on
1939    matrices.
1940 
1941    Level: intermediate
1942 
1943 .keywords: matrix, block, aij, compressed row, sparse, parallel
1944 
1945 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1946 @*/
1947 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1948 {
1949   PetscErrorCode ierr;
1950 
1951   PetscFunctionBegin;
1952   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1953   PetscValidType(B,1);
1954   PetscValidLogicalCollectiveInt(B,bs,2);
1955   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatCreateSBAIJ"
1961 /*@C
1962    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1963    (block compressed row).  For good matrix assembly performance
1964    the user should preallocate the matrix storage by setting the parameters
1965    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1966    performance can be increased by more than a factor of 50.
1967 
1968    Collective on MPI_Comm
1969 
1970    Input Parameters:
1971 +  comm - MPI communicator
1972 .  bs   - size of blockk
1973 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1974            This value should be the same as the local size used in creating the
1975            y vector for the matrix-vector product y = Ax.
1976 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1977            This value should be the same as the local size used in creating the
1978            x vector for the matrix-vector product y = Ax.
1979 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1980 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1981 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1982            submatrix  (same for all local rows)
1983 .  d_nnz - array containing the number of block nonzeros in the various block rows
1984            in the upper triangular portion of the in diagonal portion of the local
1985            (possibly different for each block block row) or PETSC_NULL.
1986            If you plan to factor the matrix you must leave room for the diagonal entry and
1987            set its value even if it is zero.
1988 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1989            submatrix (same for all local rows).
1990 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1991            off-diagonal portion of the local submatrix (possibly different for
1992            each block row) or PETSC_NULL.
1993 
1994    Output Parameter:
1995 .  A - the matrix
1996 
1997    Options Database Keys:
1998 .   -mat_no_unroll - uses code that does not unroll the loops in the
1999                      block calculations (much slower)
2000 .   -mat_block_size - size of the blocks to use
2001 .   -mat_mpi - use the parallel matrix data structures even on one processor
2002                (defaults to using SeqBAIJ format on one processor)
2003 
2004    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2005    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2006    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2007 
2008    Notes:
2009    The number of rows and columns must be divisible by blocksize.
2010    This matrix type does not support complex Hermitian operation.
2011 
2012    The user MUST specify either the local or global matrix dimensions
2013    (possibly both).
2014 
2015    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2016    than it must be used on all processors that share the object for that argument.
2017 
2018    If the *_nnz parameter is given then the *_nz parameter is ignored
2019 
2020    Storage Information:
2021    For a square global matrix we define each processor's diagonal portion
2022    to be its local rows and the corresponding columns (a square submatrix);
2023    each processor's off-diagonal portion encompasses the remainder of the
2024    local matrix (a rectangular submatrix).
2025 
2026    The user can specify preallocated storage for the diagonal part of
2027    the local submatrix with either d_nz or d_nnz (not both).  Set
2028    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2029    memory allocation.  Likewise, specify preallocated storage for the
2030    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2031 
2032    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2033    the figure below we depict these three local rows and all columns (0-11).
2034 
2035 .vb
2036            0 1 2 3 4 5 6 7 8 9 10 11
2037           -------------------
2038    row 3  |  . . . d d d o o o o o o
2039    row 4  |  . . . d d d o o o o o o
2040    row 5  |  . . . d d d o o o o o o
2041           -------------------
2042 .ve
2043 
2044    Thus, any entries in the d locations are stored in the d (diagonal)
2045    submatrix, and any entries in the o locations are stored in the
2046    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2047    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2048 
2049    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2050    plus the diagonal part of the d matrix,
2051    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2052    In general, for PDE problems in which most nonzeros are near the diagonal,
2053    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2054    or you will get TERRIBLE performance; see the users' manual chapter on
2055    matrices.
2056 
2057    Level: intermediate
2058 
2059 .keywords: matrix, block, aij, compressed row, sparse, parallel
2060 
2061 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2062 @*/
2063 
2064 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2065 {
2066   PetscErrorCode ierr;
2067   PetscMPIInt    size;
2068 
2069   PetscFunctionBegin;
2070   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2071   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2072   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2073   if (size > 1) {
2074     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2075     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2076   } else {
2077     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2078     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2079   }
2080   PetscFunctionReturn(0);
2081 }
2082 
2083 
2084 #undef __FUNCT__
2085 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2086 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2087 {
2088   Mat            mat;
2089   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2090   PetscErrorCode ierr;
2091   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2092   PetscScalar    *array;
2093 
2094   PetscFunctionBegin;
2095   *newmat       = 0;
2096   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2097   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2098   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2099   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2100   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2101   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2102 
2103   mat->factortype   = matin->factortype;
2104   mat->preallocated = PETSC_TRUE;
2105   mat->assembled    = PETSC_TRUE;
2106   mat->insertmode   = NOT_SET_VALUES;
2107 
2108   a = (Mat_MPISBAIJ*)mat->data;
2109   a->bs2   = oldmat->bs2;
2110   a->mbs   = oldmat->mbs;
2111   a->nbs   = oldmat->nbs;
2112   a->Mbs   = oldmat->Mbs;
2113   a->Nbs   = oldmat->Nbs;
2114 
2115 
2116   a->size         = oldmat->size;
2117   a->rank         = oldmat->rank;
2118   a->donotstash   = oldmat->donotstash;
2119   a->roworiented  = oldmat->roworiented;
2120   a->rowindices   = 0;
2121   a->rowvalues    = 0;
2122   a->getrowactive = PETSC_FALSE;
2123   a->barray       = 0;
2124   a->rstartbs    = oldmat->rstartbs;
2125   a->rendbs      = oldmat->rendbs;
2126   a->cstartbs    = oldmat->cstartbs;
2127   a->cendbs      = oldmat->cendbs;
2128 
2129   /* hash table stuff */
2130   a->ht           = 0;
2131   a->hd           = 0;
2132   a->ht_size      = 0;
2133   a->ht_flag      = oldmat->ht_flag;
2134   a->ht_fact      = oldmat->ht_fact;
2135   a->ht_total_ct  = 0;
2136   a->ht_insert_ct = 0;
2137 
2138   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2139   if (oldmat->colmap) {
2140 #if defined (PETSC_USE_CTABLE)
2141     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2142 #else
2143     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2144     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2145     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2146 #endif
2147   } else a->colmap = 0;
2148 
2149   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2150     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2151     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2152     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2153   } else a->garray = 0;
2154 
2155   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2156   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2158   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2159   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2160 
2161   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2162   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2163   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2164   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2165 
2166   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2167   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2168   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2169   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2170   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2171   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2172   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2173   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2174   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2175   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2176   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2177   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2178   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2179 
2180   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2181   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2182   a->sMvctx = oldmat->sMvctx;
2183   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2184 
2185   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2186   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2187   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2188   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2189   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2190   *newmat = mat;
2191   PetscFunctionReturn(0);
2192 }
2193 
2194 #undef __FUNCT__
2195 #define __FUNCT__ "MatLoad_MPISBAIJ"
2196 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2197 {
2198   PetscErrorCode ierr;
2199   PetscInt       i,nz,j,rstart,rend;
2200   PetscScalar    *vals,*buf;
2201   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2202   MPI_Status     status;
2203   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2204   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2205   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2206   PetscInt       bs=1,Mbs,mbs,extra_rows;
2207   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2208   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2209   int            fd;
2210 
2211   PetscFunctionBegin;
2212   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2213     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2214   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2215 
2216   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2217   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2218   if (!rank) {
2219     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2220     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2221     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2222     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2223   }
2224 
2225   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2226 
2227   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2228   M = header[1]; N = header[2];
2229 
2230   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2231   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2232   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2233 
2234   /* If global sizes are set, check if they are consistent with that given in the file */
2235   if (sizesset) {
2236     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2237   }
2238   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2239   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2240 
2241   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2242 
2243   /*
2244      This code adds extra rows to make sure the number of rows is
2245      divisible by the blocksize
2246   */
2247   Mbs        = M/bs;
2248   extra_rows = bs - M + bs*(Mbs);
2249   if (extra_rows == bs) extra_rows = 0;
2250   else                  Mbs++;
2251   if (extra_rows &&!rank) {
2252     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2253   }
2254 
2255   /* determine ownership of all rows */
2256   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2257     mbs        = Mbs/size + ((Mbs % size) > rank);
2258     m          = mbs*bs;
2259   } else { /* User Set */
2260     m          = newmat->rmap->n;
2261     mbs        = m/bs;
2262   }
2263   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2264   mmbs       = PetscMPIIntCast(mbs);
2265   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2266   rowners[0] = 0;
2267   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2268   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2269   rstart = rowners[rank];
2270   rend   = rowners[rank+1];
2271 
2272   /* distribute row lengths to all processors */
2273   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2274   if (!rank) {
2275     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2276     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2277     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2278     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2279     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2280     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2281     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2282   } else {
2283     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2284   }
2285 
2286   if (!rank) {   /* procs[0] */
2287     /* calculate the number of nonzeros on each processor */
2288     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2289     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2290     for (i=0; i<size; i++) {
2291       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2292         procsnz[i] += rowlengths[j];
2293       }
2294     }
2295     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2296 
2297     /* determine max buffer needed and allocate it */
2298     maxnz = 0;
2299     for (i=0; i<size; i++) {
2300       maxnz = PetscMax(maxnz,procsnz[i]);
2301     }
2302     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2303 
2304     /* read in my part of the matrix column indices  */
2305     nz     = procsnz[0];
2306     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2307     mycols = ibuf;
2308     if (size == 1)  nz -= extra_rows;
2309     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2310     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2311 
2312     /* read in every ones (except the last) and ship off */
2313     for (i=1; i<size-1; i++) {
2314       nz   = procsnz[i];
2315       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2316       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2317     }
2318     /* read in the stuff for the last proc */
2319     if (size != 1) {
2320       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2321       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2322       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2323       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2324     }
2325     ierr = PetscFree(cols);CHKERRQ(ierr);
2326   } else {  /* procs[i], i>0 */
2327     /* determine buffer space needed for message */
2328     nz = 0;
2329     for (i=0; i<m; i++) {
2330       nz += locrowlens[i];
2331     }
2332     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2333     mycols = ibuf;
2334     /* receive message of column indices*/
2335     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2336     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2337     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2338   }
2339 
2340   /* loop over local rows, determining number of off diagonal entries */
2341   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2342   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2343   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2344   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2345   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2346   rowcount = 0;
2347   nzcount  = 0;
2348   for (i=0; i<mbs; i++) {
2349     dcount  = 0;
2350     odcount = 0;
2351     for (j=0; j<bs; j++) {
2352       kmax = locrowlens[rowcount];
2353       for (k=0; k<kmax; k++) {
2354         tmp = mycols[nzcount++]/bs; /* block col. index */
2355         if (!mask[tmp]) {
2356           mask[tmp] = 1;
2357           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2358           else masked1[dcount++] = tmp; /* entry in diag portion */
2359         }
2360       }
2361       rowcount++;
2362     }
2363 
2364     dlens[i]  = dcount;  /* d_nzz[i] */
2365     odlens[i] = odcount; /* o_nzz[i] */
2366 
2367     /* zero out the mask elements we set */
2368     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2369     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2370   }
2371     if (!sizesset) {
2372     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2373   }
2374   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2375   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2376 
2377   if (!rank) {
2378     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2379     /* read in my part of the matrix numerical values  */
2380     nz = procsnz[0];
2381     vals = buf;
2382     mycols = ibuf;
2383     if (size == 1)  nz -= extra_rows;
2384     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2385     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2386 
2387     /* insert into matrix */
2388     jj      = rstart*bs;
2389     for (i=0; i<m; i++) {
2390       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2391       mycols += locrowlens[i];
2392       vals   += locrowlens[i];
2393       jj++;
2394     }
2395 
2396     /* read in other processors (except the last one) and ship out */
2397     for (i=1; i<size-1; i++) {
2398       nz   = procsnz[i];
2399       vals = buf;
2400       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2401       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2402     }
2403     /* the last proc */
2404     if (size != 1) {
2405       nz   = procsnz[i] - extra_rows;
2406       vals = buf;
2407       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2408       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2409       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2410     }
2411     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2412 
2413   } else {
2414     /* receive numeric values */
2415     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2416 
2417     /* receive message of values*/
2418     vals   = buf;
2419     mycols = ibuf;
2420     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2421     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2422     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2423 
2424     /* insert into matrix */
2425     jj      = rstart*bs;
2426     for (i=0; i<m; i++) {
2427       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2428       mycols += locrowlens[i];
2429       vals   += locrowlens[i];
2430       jj++;
2431     }
2432   }
2433 
2434   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2435   ierr = PetscFree(buf);CHKERRQ(ierr);
2436   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2437   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2438   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2439   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2440   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2441   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2442   PetscFunctionReturn(0);
2443 }
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2447 /*XXXXX@
2448    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2449 
2450    Input Parameters:
2451 .  mat  - the matrix
2452 .  fact - factor
2453 
2454    Not Collective on Mat, each process can have a different hash factor
2455 
2456    Level: advanced
2457 
2458   Notes:
2459    This can also be set by the command line option: -mat_use_hash_table fact
2460 
2461 .keywords: matrix, hashtable, factor, HT
2462 
2463 .seealso: MatSetOption()
2464 @XXXXX*/
2465 
2466 
2467 #undef __FUNCT__
2468 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2469 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2470 {
2471   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2472   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2473   PetscReal      atmp;
2474   PetscReal      *work,*svalues,*rvalues;
2475   PetscErrorCode ierr;
2476   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2477   PetscMPIInt    rank,size;
2478   PetscInt       *rowners_bs,dest,count,source;
2479   PetscScalar    *va;
2480   MatScalar      *ba;
2481   MPI_Status     stat;
2482 
2483   PetscFunctionBegin;
2484   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2485   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2486   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2487 
2488   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2489   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2490 
2491   bs   = A->rmap->bs;
2492   mbs  = a->mbs;
2493   Mbs  = a->Mbs;
2494   ba   = b->a;
2495   bi   = b->i;
2496   bj   = b->j;
2497 
2498   /* find ownerships */
2499   rowners_bs = A->rmap->range;
2500 
2501   /* each proc creates an array to be distributed */
2502   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2503   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2504 
2505   /* row_max for B */
2506   if (rank != size-1) {
2507     for (i=0; i<mbs; i++) {
2508       ncols = bi[1] - bi[0]; bi++;
2509       brow  = bs*i;
2510       for (j=0; j<ncols; j++) {
2511         bcol = bs*(*bj);
2512         for (kcol=0; kcol<bs; kcol++) {
2513           col = bcol + kcol;                 /* local col index */
2514           col += rowners_bs[rank+1];      /* global col index */
2515           for (krow=0; krow<bs; krow++) {
2516             atmp = PetscAbsScalar(*ba); ba++;
2517             row = brow + krow;    /* local row index */
2518             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2519             if (work[col] < atmp) work[col] = atmp;
2520           }
2521         }
2522         bj++;
2523       }
2524     }
2525 
2526     /* send values to its owners */
2527     for (dest=rank+1; dest<size; dest++) {
2528       svalues = work + rowners_bs[dest];
2529       count   = rowners_bs[dest+1]-rowners_bs[dest];
2530       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2531     }
2532   }
2533 
2534   /* receive values */
2535   if (rank) {
2536     rvalues = work;
2537     count   = rowners_bs[rank+1]-rowners_bs[rank];
2538     for (source=0; source<rank; source++) {
2539       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2540       /* process values */
2541       for (i=0; i<count; i++) {
2542         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2543       }
2544     }
2545   }
2546 
2547   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2548   ierr = PetscFree(work);CHKERRQ(ierr);
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 #undef __FUNCT__
2553 #define __FUNCT__ "MatSOR_MPISBAIJ"
2554 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2555 {
2556   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2557   PetscErrorCode    ierr;
2558   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2559   PetscScalar       *x,*ptr,*from;
2560   Vec               bb1;
2561   const PetscScalar *b;
2562 
2563   PetscFunctionBegin;
2564   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2565   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2566 
2567   if (flag == SOR_APPLY_UPPER) {
2568     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2569     PetscFunctionReturn(0);
2570   }
2571 
2572   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2573     if (flag & SOR_ZERO_INITIAL_GUESS) {
2574       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2575       its--;
2576     }
2577 
2578     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2579     while (its--) {
2580 
2581       /* lower triangular part: slvec0b = - B^T*xx */
2582       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2583 
2584       /* copy xx into slvec0a */
2585       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2586       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2587       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2588       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2589 
2590       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2591 
2592       /* copy bb into slvec1a */
2593       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2594       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2595       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2596       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2597 
2598       /* set slvec1b = 0 */
2599       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2600 
2601       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2602       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2603       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2604       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2605 
2606       /* upper triangular part: bb1 = bb1 - B*x */
2607       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2608 
2609       /* local diagonal sweep */
2610       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2611     }
2612     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2613   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2614     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2615   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2616     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2617   } else if (flag & SOR_EISENSTAT) {
2618     Vec               xx1;
2619     PetscBool         hasop;
2620     const PetscScalar *diag;
2621     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2622     PetscInt          i,n;
2623 
2624     if (!mat->xx1) {
2625       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2626       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2627     }
2628     xx1 = mat->xx1;
2629     bb1 = mat->bb1;
2630 
2631     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2632 
2633     if (!mat->diag) {
2634       /* this is wrong for same matrix with new nonzero values */
2635       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2636       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2637     }
2638     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2639 
2640     if (hasop) {
2641       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2642       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2643     } else {
2644       /*
2645           These two lines are replaced by code that may be a bit faster for a good compiler
2646       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2647       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2648       */
2649       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2650       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2651       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2652       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2653       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2654       if (omega == 1.0) {
2655         for (i=0; i<n; i++) {
2656           sl[i] = b[i] - diag[i]*x[i];
2657         }
2658         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2659       } else {
2660         for (i=0; i<n; i++) {
2661           sl[i] = b[i] + scale*diag[i]*x[i];
2662         }
2663         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2664       }
2665       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2666       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2667       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2668       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2669     }
2670 
2671     /* multiply off-diagonal portion of matrix */
2672     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2673     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2674     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2675     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2676     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2677     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2678     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2679     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2680     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2681     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2682 
2683     /* local sweep */
2684     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2685     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2686   } else {
2687     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2688   }
2689   PetscFunctionReturn(0);
2690 }
2691 
2692 #undef __FUNCT__
2693 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2694 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2695 {
2696   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2697   PetscErrorCode ierr;
2698   Vec            lvec1,bb1;
2699 
2700   PetscFunctionBegin;
2701   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2702   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2703 
2704   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2705     if (flag & SOR_ZERO_INITIAL_GUESS) {
2706       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2707       its--;
2708     }
2709 
2710     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2711     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2712     while (its--) {
2713       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714 
2715       /* lower diagonal part: bb1 = bb - B^T*xx */
2716       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2717       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2718 
2719       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2720       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2721       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2722 
2723       /* upper diagonal part: bb1 = bb1 - B*x */
2724       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2725       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2726 
2727       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2728 
2729       /* diagonal sweep */
2730       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2731     }
2732     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2733     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2734   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2735   PetscFunctionReturn(0);
2736 }
2737 
2738 #undef __FUNCT__
2739 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2740 /*@
2741      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2742          CSR format the local rows.
2743 
2744    Collective on MPI_Comm
2745 
2746    Input Parameters:
2747 +  comm - MPI communicator
2748 .  bs - the block size, only a block size of 1 is supported
2749 .  m - number of local rows (Cannot be PETSC_DECIDE)
2750 .  n - This value should be the same as the local size used in creating the
2751        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2752        calculated if N is given) For square matrices n is almost always m.
2753 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2754 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2755 .   i - row indices
2756 .   j - column indices
2757 -   a - matrix values
2758 
2759    Output Parameter:
2760 .   mat - the matrix
2761 
2762    Level: intermediate
2763 
2764    Notes:
2765        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2766      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2767      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2768 
2769        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2770 
2771 .keywords: matrix, aij, compressed row, sparse, parallel
2772 
2773 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2774           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2775 @*/
2776 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2777 {
2778   PetscErrorCode ierr;
2779 
2780 
2781  PetscFunctionBegin;
2782   if (i[0]) {
2783     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2784   }
2785   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2786   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2787   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2788   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2789   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2790   PetscFunctionReturn(0);
2791 }
2792 
2793 
2794 #undef __FUNCT__
2795 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2796 /*@C
2797    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2798    (the default parallel PETSc format).
2799 
2800    Collective on MPI_Comm
2801 
2802    Input Parameters:
2803 +  A - the matrix
2804 .  bs - the block size
2805 .  i - the indices into j for the start of each local row (starts with zero)
2806 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2807 -  v - optional values in the matrix
2808 
2809    Level: developer
2810 
2811 .keywords: matrix, aij, compressed row, sparse, parallel
2812 
2813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2814 @*/
2815 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2816 {
2817   PetscErrorCode ierr;
2818 
2819   PetscFunctionBegin;
2820   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2821   PetscFunctionReturn(0);
2822 }
2823 
2824 
2825