xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 5c8f6a953e7ed1c81f507d64aebddb11080b60e9)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash) {
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs) {
189           if (a->ignore_ltriangular) {
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j];
200           else             value = v[i+j*m];
201           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
202           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
203         } else if (in[j] < 0) continue;
204 #if defined(PETSC_USE_DEBUG)
205         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
206 #endif
207         else {  /* off-diag entry (B) */
208           if (mat->was_assembled) {
209             if (!baij->colmap) {
210               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
211             }
212 #if defined (PETSC_USE_CTABLE)
213             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
214             col  = col - 1;
215 #else
216             col = baij->colmap[in[j]/bs] - 1;
217 #endif
218             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
219               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
220               col =  in[j];
221               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
222               B = baij->B;
223               b = (Mat_SeqBAIJ*)(B)->data;
224               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
225               ba=b->a;
226             } else col += in[j]%bs;
227           } else col = in[j];
228           if (roworiented) value = v[i*n+j];
229           else             value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
236       if (!baij->donotstash) {
237         mat->assembled = PETSC_FALSE;
238         n_loc = 0;
239         for (j=0; j<n; j++) {
240           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
241           in_loc[n_loc] = in[j];
242           if (roworiented) {
243             v_loc[n_loc] = v[i*n+j];
244           } else {
245             v_loc[n_loc] = v[j*m+i];
246           }
247           n_loc++;
248         }
249         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
250       }
251     }
252   }
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
259 {
260   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
261   const MatScalar *value;
262   MatScalar       *barray=baij->barray;
263   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
264   PetscErrorCode  ierr;
265   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
266   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
267   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
268 
269   PetscFunctionBegin;
270   if (!barray) {
271     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
272     baij->barray = barray;
273   }
274 
275   if (roworiented) {
276     stepval = (n-1)*bs;
277   } else {
278     stepval = (m-1)*bs;
279   }
280   for (i=0; i<m; i++) {
281     if (im[i] < 0) continue;
282 #if defined(PETSC_USE_DEBUG)
283     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
284 #endif
285     if (im[i] >= rstart && im[i] < rend) {
286       row = im[i] - rstart;
287       for (j=0; j<n; j++) {
288         if (im[i] > in[j]) {
289           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
290           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
291         }
292         /* If NumCol = 1 then a copy is not required */
293         if ((roworiented) && (n == 1)) {
294           barray = (MatScalar*) v + i*bs2;
295         } else if ((!roworiented) && (m == 1)) {
296           barray = (MatScalar*) v + j*bs2;
297         } else { /* Here a copy is required */
298           if (roworiented) {
299             value = v + i*(stepval+bs)*bs + j*bs;
300           } else {
301             value = v + j*(stepval+bs)*bs + i*bs;
302           }
303           for (ii=0; ii<bs; ii++,value+=stepval) {
304             for (jj=0; jj<bs; jj++) {
305               *barray++  = *value++;
306             }
307           }
308           barray -=bs2;
309         }
310 
311         if (in[j] >= cstart && in[j] < cend) {
312           col  = in[j] - cstart;
313           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
314         }
315         else if (in[j] < 0) continue;
316 #if defined(PETSC_USE_DEBUG)
317         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
318 #endif
319         else {
320           if (mat->was_assembled) {
321             if (!baij->colmap) {
322               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
323             }
324 
325 #if defined(PETSC_USE_DEBUG)
326 #if defined (PETSC_USE_CTABLE)
327             { PetscInt data;
328               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
329               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330             }
331 #else
332             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
333 #endif
334 #endif
335 #if defined (PETSC_USE_CTABLE)
336             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
337             col  = (col - 1)/bs;
338 #else
339             col = (baij->colmap[in[j]] - 1)/bs;
340 #endif
341             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
342               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
343               col =  in[j];
344             }
345           }
346           else col = in[j];
347           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
348         }
349       }
350     } else {
351       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
352       if (!baij->donotstash) {
353         if (roworiented) {
354           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         } else {
356           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
357         }
358       }
359     }
360   }
361   PetscFunctionReturn(0);
362 }
363 
364 #undef __FUNCT__
365 #define __FUNCT__ "MatGetValues_MPISBAIJ"
366 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
367 {
368   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
369   PetscErrorCode ierr;
370   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
371   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
372 
373   PetscFunctionBegin;
374   for (i=0; i<m; i++) {
375     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
376     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
377     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
378       row = idxm[i] - bsrstart;
379       for (j=0; j<n; j++) {
380         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
381         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
382         if (idxn[j] >= bscstart && idxn[j] < bscend) {
383           col = idxn[j] - bscstart;
384           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
385         } else {
386           if (!baij->colmap) {
387             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388           }
389 #if defined (PETSC_USE_CTABLE)
390           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
391           data --;
392 #else
393           data = baij->colmap[idxn[j]/bs]-1;
394 #endif
395           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
396           else {
397             col  = data + idxn[j]%bs;
398             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
399           }
400         }
401       }
402     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++) {
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++) {
446             for (row=0; row<bs; row++) {
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++) {
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++) {
463             for (row=0; row<bs; row++) {
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
478   }
479   PetscFunctionReturn(0);
480 }
481 
482 #undef __FUNCT__
483 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
484 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
485 {
486   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
487   PetscErrorCode ierr;
488   PetscInt       nstash,reallocs;
489   InsertMode     addv;
490 
491   PetscFunctionBegin;
492   if (baij->donotstash || mat->nooffprocentries) {
493     PetscFunctionReturn(0);
494   }
495 
496   /* make sure all processors are either in INSERTMODE or ADDMODE */
497   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
498   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscBool      other_disassembled;
520   PetscMPIInt    n;
521   PetscBool      r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527   if (!baij->donotstash &&  !mat->nooffprocentries) {
528     while (1) {
529       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
530       if (!flg) break;
531 
532       for (i=0; i<n;) {
533         /* Now identify the consecutive vals belonging to the same row */
534         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
535         if (j < n) ncols = j-i;
536         else       ncols = n-i;
537         /* Now assemble all these values with a single function call */
538         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
539         i = j;
540       }
541     }
542     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
543     /* Now process the block-stash. Since the values are stashed column-oriented,
544        set the roworiented flag to column oriented, and after MatSetValues()
545        restore the original flags */
546     r1 = baij->roworiented;
547     r2 = a->roworiented;
548     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
549     baij->roworiented = PETSC_FALSE;
550     a->roworiented    = PETSC_FALSE;
551     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
552     while (1) {
553       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
554       if (!flg) break;
555 
556       for (i=0; i<n;) {
557         /* Now identify the consecutive vals belonging to the same row */
558         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
559         if (j < n) ncols = j-i;
560         else       ncols = n-i;
561         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
562         i = j;
563       }
564     }
565     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
566     baij->roworiented = r1;
567     a->roworiented    = r2;
568     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
569   }
570 
571   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
572   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
573 
574   /* determine if any processor has disassembled, if so we must
575      also disassemble ourselfs, in order that we may reassemble. */
576   /*
577      if nonzero structure of submatrix B cannot change then we know that
578      no processor disassembled thus we can skip this stuff
579   */
580   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
581     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
582     if (mat->was_assembled && !other_disassembled) {
583       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
584     }
585   }
586 
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
589   }
590   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
591   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
592   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
593 
594   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
595   baij->rowvalues = 0;
596 
597   PetscFunctionReturn(0);
598 }
599 
600 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
601 #undef __FUNCT__
602 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
603 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
604 {
605   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
606   PetscErrorCode    ierr;
607   PetscInt          bs = mat->rmap->bs;
608   PetscMPIInt       size = baij->size,rank = baij->rank;
609   PetscBool         iascii,isdraw;
610   PetscViewer       sviewer;
611   PetscViewerFormat format;
612 
613   PetscFunctionBegin;
614   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
615   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
616   if (iascii) {
617     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
618     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
619       MatInfo info;
620       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
621       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
622       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
624       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
628       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
630       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
631       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
632       PetscFunctionReturn(0);
633     } else if (format == PETSC_VIEWER_ASCII_INFO) {
634       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
637       PetscFunctionReturn(0);
638     }
639   }
640 
641   if (isdraw) {
642     PetscDraw  draw;
643     PetscBool  isnull;
644     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
645     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
646   }
647 
648   if (size == 1) {
649     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
650     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
651   } else {
652     /* assemble the entire matrix onto first processor. */
653     Mat          A;
654     Mat_SeqSBAIJ *Aloc;
655     Mat_SeqBAIJ  *Bloc;
656     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
657     MatScalar    *a;
658 
659     /* Should this be the same type as mat? */
660     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
661     if (!rank) {
662       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
663     } else {
664       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
665     }
666     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
667     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
668     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
713       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
714       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
715     }
716     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
717     ierr = MatDestroy(&A);CHKERRQ(ierr);
718   }
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatView_MPISBAIJ"
724 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
725 {
726   PetscErrorCode ierr;
727   PetscBool      iascii,isdraw,issocket,isbinary;
728 
729   PetscFunctionBegin;
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
731   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
732   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
733   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
734   if (iascii || isdraw || issocket || isbinary) {
735     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
762   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
763   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
765   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
768   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
769   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
770   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
771   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
775 #if defined(PETSC_USE_REAL_MAT_SINGLE)
776   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
777 #endif
778   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
779   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
780   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
781   ierr = PetscFree(mat->data);CHKERRQ(ierr);
782 
783   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
794 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
795 {
796   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
797   PetscErrorCode ierr;
798   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
799   PetscScalar    *x,*from;
800 
801   PetscFunctionBegin;
802   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
803   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
804 
805   /* diagonal part */
806   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
807   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
808 
809   /* subdiagonal part */
810   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
811 
812   /* copy x into the vec slvec0 */
813   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
814   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
815 
816   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
817   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
818   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
819 
820   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822   /* supperdiagonal part */
823   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatMult_MPISBAIJ"
829 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
830 {
831   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
832   PetscErrorCode ierr;
833   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
834   PetscScalar    *x,*from;
835 
836   PetscFunctionBegin;
837   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
838   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
839 
840   /* diagonal part */
841   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
842   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
843 
844   /* subdiagonal part */
845   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
846 
847   /* copy x into the vec slvec0 */
848   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
849   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
850 
851   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
852   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
853   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
854 
855   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857   /* supperdiagonal part */
858   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
864 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
865 {
866   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
867   PetscErrorCode ierr;
868   PetscInt       nt;
869 
870   PetscFunctionBegin;
871   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
872   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
873 
874   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
875   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
876 
877   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878   /* do diagonal part */
879   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
880   /* do supperdiagonal part */
881   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
883   /* do subdiagonal part */
884   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
885   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
886   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
887 
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
893 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
894 {
895   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
896   PetscErrorCode ierr;
897   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
898   PetscScalar    *x,*from,zero=0.0;
899 
900   PetscFunctionBegin;
901   /*
902   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
903   PetscSynchronizedFlush(((PetscObject)A)->comm);
904   */
905   /* diagonal part */
906   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
907   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
908 
909   /* subdiagonal part */
910   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
911 
912   /* copy x into the vec slvec0 */
913   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
914   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
915   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
916   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
917 
918   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921 
922   /* supperdiagonal part */
923   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
924 
925   PetscFunctionReturn(0);
926 }
927 
928 #undef __FUNCT__
929 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
930 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
931 {
932   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
933   PetscErrorCode ierr;
934 
935   PetscFunctionBegin;
936   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
937   /* do diagonal part */
938   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
939   /* do supperdiagonal part */
940   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
942 
943   /* do subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
945   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
946   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
947 
948   PetscFunctionReturn(0);
949 }
950 
951 /*
952   This only works correctly for square matrices where the subblock A->A is the
953    diagonal block
954 */
955 #undef __FUNCT__
956 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
957 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
958 {
959   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
964   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatScale_MPISBAIJ"
970 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
977   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatGetRow_MPISBAIJ"
983 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
984 {
985   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
986   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
987   PetscErrorCode ierr;
988   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
989   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
990   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
991 
992   PetscFunctionBegin;
993   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
994   mat->getrowactive = PETSC_TRUE;
995 
996   if (!mat->rowvalues && (idx || v)) {
997     /*
998         allocate enough space to hold information from the longest row.
999     */
1000     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1001     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1002     PetscInt     max = 1,mbs = mat->mbs,tmp;
1003     for (i=0; i<mbs; i++) {
1004       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1005       if (max < tmp) { max = tmp; }
1006     }
1007     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1008   }
1009 
1010   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1011   lrow = row - brstart;  /* local row index */
1012 
1013   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1014   if (!v)   {pvA = 0; pvB = 0;}
1015   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1016   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1017   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1018   nztot = nzA + nzB;
1019 
1020   cmap  = mat->garray;
1021   if (v  || idx) {
1022     if (nztot) {
1023       /* Sort by increasing column numbers, assuming A and B already sorted */
1024       PetscInt imark = -1;
1025       if (v) {
1026         *v = v_p = mat->rowvalues;
1027         for (i=0; i<nzB; i++) {
1028           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1029           else break;
1030         }
1031         imark = i;
1032         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1033         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1034       }
1035       if (idx) {
1036         *idx = idx_p = mat->rowindices;
1037         if (imark > -1) {
1038           for (i=0; i<imark; i++) {
1039             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1040           }
1041         } else {
1042           for (i=0; i<nzB; i++) {
1043             if (cmap[cworkB[i]/bs] < cstart)
1044               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1045             else break;
1046           }
1047           imark = i;
1048         }
1049         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1050         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1051       }
1052     } else {
1053       if (idx) *idx = 0;
1054       if (v)   *v   = 0;
1055     }
1056   }
1057   *nz = nztot;
1058   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1059   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1060   PetscFunctionReturn(0);
1061 }
1062 
1063 #undef __FUNCT__
1064 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1065 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1066 {
1067   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1068 
1069   PetscFunctionBegin;
1070   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1071   baij->getrowactive = PETSC_FALSE;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1077 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1078 {
1079   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1080   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1081 
1082   PetscFunctionBegin;
1083   aA->getrow_utriangular = PETSC_TRUE;
1084   PetscFunctionReturn(0);
1085 }
1086 #undef __FUNCT__
1087 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1088 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1089 {
1090   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1091   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1092 
1093   PetscFunctionBegin;
1094   aA->getrow_utriangular = PETSC_FALSE;
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1100 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1101 {
1102   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1103   PetscErrorCode ierr;
1104 
1105   PetscFunctionBegin;
1106   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1107   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1108   PetscFunctionReturn(0);
1109 }
1110 
1111 #undef __FUNCT__
1112 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1113 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1114 {
1115   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1116   PetscErrorCode ierr;
1117 
1118   PetscFunctionBegin;
1119   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1120   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1126 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1127 {
1128   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1129   PetscErrorCode ierr;
1130 
1131   PetscFunctionBegin;
1132   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1133   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1134   PetscFunctionReturn(0);
1135 }
1136 
1137 #undef __FUNCT__
1138 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1139 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1140 {
1141   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1142   Mat            A = a->A,B = a->B;
1143   PetscErrorCode ierr;
1144   PetscReal      isend[5],irecv[5];
1145 
1146   PetscFunctionBegin;
1147   info->block_size     = (PetscReal)matin->rmap->bs;
1148   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1149   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1150   isend[3] = info->memory;  isend[4] = info->mallocs;
1151   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1152   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1153   isend[3] += info->memory;  isend[4] += info->mallocs;
1154   if (flag == MAT_LOCAL) {
1155     info->nz_used      = isend[0];
1156     info->nz_allocated = isend[1];
1157     info->nz_unneeded  = isend[2];
1158     info->memory       = isend[3];
1159     info->mallocs      = isend[4];
1160   } else if (flag == MAT_GLOBAL_MAX) {
1161     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1162     info->nz_used      = irecv[0];
1163     info->nz_allocated = irecv[1];
1164     info->nz_unneeded  = irecv[2];
1165     info->memory       = irecv[3];
1166     info->mallocs      = irecv[4];
1167   } else if (flag == MAT_GLOBAL_SUM) {
1168     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1169     info->nz_used      = irecv[0];
1170     info->nz_allocated = irecv[1];
1171     info->nz_unneeded  = irecv[2];
1172     info->memory       = irecv[3];
1173     info->mallocs      = irecv[4];
1174   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1175   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1176   info->fill_ratio_needed = 0;
1177   info->factor_mallocs    = 0;
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1183 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1184 {
1185   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1186   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1187   PetscErrorCode ierr;
1188 
1189   PetscFunctionBegin;
1190   switch (op) {
1191   case MAT_NEW_NONZERO_LOCATIONS:
1192   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1193   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1194   case MAT_KEEP_NONZERO_PATTERN:
1195   case MAT_NEW_NONZERO_LOCATION_ERR:
1196     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1197     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1198     break;
1199   case MAT_ROW_ORIENTED:
1200     a->roworiented = flg;
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_NEW_DIAGONALS:
1205     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1206     break;
1207   case MAT_IGNORE_OFF_PROC_ENTRIES:
1208     a->donotstash = flg;
1209     break;
1210   case MAT_USE_HASH_TABLE:
1211     a->ht_flag = flg;
1212     break;
1213   case MAT_HERMITIAN:
1214     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1215     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1216     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1217     break;
1218   case MAT_SPD:
1219     A->spd_set                         = PETSC_TRUE;
1220     A->spd                             = flg;
1221     if (flg) {
1222       A->symmetric                     = PETSC_TRUE;
1223       A->structurally_symmetric        = PETSC_TRUE;
1224       A->symmetric_set                 = PETSC_TRUE;
1225       A->structurally_symmetric_set    = PETSC_TRUE;
1226     }
1227     break;
1228   case MAT_SYMMETRIC:
1229     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1230     break;
1231   case MAT_STRUCTURALLY_SYMMETRIC:
1232     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1233     break;
1234   case MAT_SYMMETRY_ETERNAL:
1235     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1236     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1237     break;
1238   case MAT_IGNORE_LOWER_TRIANGULAR:
1239     aA->ignore_ltriangular = flg;
1240     break;
1241   case MAT_ERROR_LOWER_TRIANGULAR:
1242     aA->ignore_ltriangular = flg;
1243     break;
1244   case MAT_GETROW_UPPERTRIANGULAR:
1245     aA->getrow_utriangular = flg;
1246     break;
1247   default:
1248     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1249   }
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1255 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1256 {
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   if (MAT_INITIAL_MATRIX || *B != A) {
1261     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1262   }
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1268 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1269 {
1270   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1271   Mat            a=baij->A, b=baij->B;
1272   PetscErrorCode ierr;
1273   PetscInt       nv,m,n;
1274   PetscBool      flg;
1275 
1276   PetscFunctionBegin;
1277   if (ll != rr) {
1278     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1279     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1280   }
1281   if (!ll) PetscFunctionReturn(0);
1282 
1283   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1284   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1285 
1286   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1287   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1288 
1289   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1290 
1291   /* left diagonalscale the off-diagonal part */
1292   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1293 
1294   /* scale the diagonal part */
1295   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1296 
1297   /* right diagonalscale the off-diagonal part */
1298   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1299   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1305 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1306 {
1307   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1308   PetscErrorCode ierr;
1309 
1310   PetscFunctionBegin;
1311   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatEqual_MPISBAIJ"
1319 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1320 {
1321   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1322   Mat            a,b,c,d;
1323   PetscBool      flg;
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   a = matA->A; b = matA->B;
1328   c = matB->A; d = matB->B;
1329 
1330   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1331   if (flg) {
1332     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1333   }
1334   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatCopy_MPISBAIJ"
1340 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1341 {
1342   PetscErrorCode ierr;
1343   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1344   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1345 
1346   PetscFunctionBegin;
1347   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1348   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1349     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1350     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1351     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1352   } else {
1353     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1354     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1361 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1362 {
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1372 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1373 {
1374   PetscErrorCode ierr;
1375   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1376   PetscBLASInt   bnz,one=1;
1377   Mat_SeqSBAIJ   *xa,*ya;
1378   Mat_SeqBAIJ    *xb,*yb;
1379 
1380   PetscFunctionBegin;
1381   if (str == SAME_NONZERO_PATTERN) {
1382     PetscScalar alpha = a;
1383     xa = (Mat_SeqSBAIJ *)xx->A->data;
1384     ya = (Mat_SeqSBAIJ *)yy->A->data;
1385     bnz = PetscBLASIntCast(xa->nz);
1386     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1387     xb = (Mat_SeqBAIJ *)xx->B->data;
1388     yb = (Mat_SeqBAIJ *)yy->B->data;
1389     bnz = PetscBLASIntCast(xb->nz);
1390     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1391   } else {
1392     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1393     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1394     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1395   }
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #undef __FUNCT__
1400 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1401 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1402 {
1403   PetscErrorCode ierr;
1404   PetscInt       i;
1405   PetscBool      flg;
1406 
1407   PetscFunctionBegin;
1408   for (i=0; i<n; i++) {
1409     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1410     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1411   }
1412   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 
1417 /* -------------------------------------------------------------------*/
1418 static struct _MatOps MatOps_Values = {
1419         MatSetValues_MPISBAIJ,
1420         MatGetRow_MPISBAIJ,
1421         MatRestoreRow_MPISBAIJ,
1422         MatMult_MPISBAIJ,
1423 /*  4*/ MatMultAdd_MPISBAIJ,
1424         MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1425         MatMultAdd_MPISBAIJ,
1426         0,
1427         0,
1428         0,
1429 /* 10*/ 0,
1430         0,
1431         0,
1432         MatSOR_MPISBAIJ,
1433         MatTranspose_MPISBAIJ,
1434 /* 15*/ MatGetInfo_MPISBAIJ,
1435         MatEqual_MPISBAIJ,
1436         MatGetDiagonal_MPISBAIJ,
1437         MatDiagonalScale_MPISBAIJ,
1438         MatNorm_MPISBAIJ,
1439 /* 20*/ MatAssemblyBegin_MPISBAIJ,
1440         MatAssemblyEnd_MPISBAIJ,
1441         MatSetOption_MPISBAIJ,
1442         MatZeroEntries_MPISBAIJ,
1443 /* 24*/ 0,
1444         0,
1445         0,
1446         0,
1447         0,
1448 /* 29*/ MatSetUp_MPISBAIJ,
1449         0,
1450         0,
1451         0,
1452         0,
1453 /* 34*/ MatDuplicate_MPISBAIJ,
1454         0,
1455         0,
1456         0,
1457         0,
1458 /* 39*/ MatAXPY_MPISBAIJ,
1459         MatGetSubMatrices_MPISBAIJ,
1460         MatIncreaseOverlap_MPISBAIJ,
1461         MatGetValues_MPISBAIJ,
1462         MatCopy_MPISBAIJ,
1463 /* 44*/ 0,
1464         MatScale_MPISBAIJ,
1465         0,
1466         0,
1467         0,
1468 /* 49*/ 0,
1469         0,
1470         0,
1471         0,
1472         0,
1473 /* 54*/ 0,
1474         0,
1475         MatSetUnfactored_MPISBAIJ,
1476         0,
1477         MatSetValuesBlocked_MPISBAIJ,
1478 /* 59*/ 0,
1479         0,
1480         0,
1481         0,
1482         0,
1483 /* 64*/ 0,
1484         0,
1485         0,
1486         0,
1487         0,
1488 /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1489         0,
1490         0,
1491         0,
1492         0,
1493 /* 74*/ 0,
1494         0,
1495         0,
1496         0,
1497         0,
1498 /* 79*/ 0,
1499         0,
1500         0,
1501         0,
1502         MatLoad_MPISBAIJ,
1503 /* 84*/ 0,
1504         0,
1505         0,
1506         0,
1507         0,
1508 /* 89*/ 0,
1509         0,
1510         0,
1511         0,
1512         0,
1513 /* 94*/ 0,
1514         0,
1515         0,
1516         0,
1517         0,
1518 /* 99*/ 0,
1519         0,
1520         0,
1521         0,
1522         0,
1523 /*104*/ 0,
1524         MatRealPart_MPISBAIJ,
1525         MatImaginaryPart_MPISBAIJ,
1526         MatGetRowUpperTriangular_MPISBAIJ,
1527         MatRestoreRowUpperTriangular_MPISBAIJ,
1528 /*109*/ 0,
1529         0,
1530         0,
1531         0,
1532         0,
1533 /*114*/ 0,
1534         0,
1535         0,
1536         0,
1537         0,
1538 /*119*/ 0,
1539         0,
1540         0,
1541         0
1542 };
1543 
1544 
1545 EXTERN_C_BEGIN
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1548 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1549 {
1550   PetscFunctionBegin;
1551   *a = ((Mat_MPISBAIJ *)A->data)->A;
1552   PetscFunctionReturn(0);
1553 }
1554 EXTERN_C_END
1555 
1556 EXTERN_C_BEGIN
1557 #undef __FUNCT__
1558 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1559 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1560 {
1561   Mat_MPISBAIJ   *b;
1562   PetscErrorCode ierr;
1563   PetscInt       i,mbs,Mbs;
1564 
1565   PetscFunctionBegin;
1566   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1567   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1568   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1569   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1570 
1571   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1572   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1573   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1574   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1575   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1576 
1577   if (d_nnz) {
1578     for (i=0; i<B->rmap->n/bs; i++) {
1579       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1580     }
1581   }
1582   if (o_nnz) {
1583     for (i=0; i<B->rmap->n/bs; i++) {
1584       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1585     }
1586   }
1587 
1588   b   = (Mat_MPISBAIJ*)B->data;
1589   mbs = B->rmap->n/bs;
1590   Mbs = B->rmap->N/bs;
1591   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1592 
1593   B->rmap->bs  = bs;
1594   b->bs2 = bs*bs;
1595   b->mbs = mbs;
1596   b->nbs = mbs;
1597   b->Mbs = Mbs;
1598   b->Nbs = Mbs;
1599 
1600   for (i=0; i<=b->size; i++) {
1601     b->rangebs[i] = B->rmap->range[i]/bs;
1602   }
1603   b->rstartbs = B->rmap->rstart/bs;
1604   b->rendbs   = B->rmap->rend/bs;
1605 
1606   b->cstartbs = B->cmap->rstart/bs;
1607   b->cendbs   = B->cmap->rend/bs;
1608 
1609   if (!B->preallocated) {
1610     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1611     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1612     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1613     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1614     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1615     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1616     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1617     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1618     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1619   }
1620 
1621   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1622   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1623   B->preallocated = PETSC_TRUE;
1624   PetscFunctionReturn(0);
1625 }
1626 EXTERN_C_END
1627 
1628 EXTERN_C_BEGIN
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1631 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1632 {
1633   PetscInt       m,rstart,cstart,cend;
1634   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1635   const PetscInt *JJ=0;
1636   PetscScalar    *values=0;
1637   PetscErrorCode ierr;
1638 
1639   PetscFunctionBegin;
1640   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1641   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1642   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1643   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1644   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1645   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1646   m      = B->rmap->n/bs;
1647   rstart = B->rmap->rstart/bs;
1648   cstart = B->cmap->rstart/bs;
1649   cend   = B->cmap->rend/bs;
1650 
1651   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1652   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1653   for (i=0; i<m; i++) {
1654     nz = ii[i+1] - ii[i];
1655     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1656     nz_max = PetscMax(nz_max,nz);
1657     JJ  = jj + ii[i];
1658     for (j=0; j<nz; j++) {
1659       if (*JJ >= cstart) break;
1660       JJ++;
1661     }
1662     d = 0;
1663     for (; j<nz; j++) {
1664       if (*JJ++ >= cend) break;
1665       d++;
1666     }
1667     d_nnz[i] = d;
1668     o_nnz[i] = nz - d;
1669   }
1670   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1671   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1672 
1673   values = (PetscScalar*)V;
1674   if (!values) {
1675     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1676     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1677   }
1678   for (i=0; i<m; i++) {
1679     PetscInt          row    = i + rstart;
1680     PetscInt          ncols  = ii[i+1] - ii[i];
1681     const PetscInt    *icols = jj + ii[i];
1682     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1683     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1684   }
1685 
1686   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1687   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1688   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1689   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1690   PetscFunctionReturn(0);
1691 }
1692 EXTERN_C_END
1693 
1694 EXTERN_C_BEGIN
1695 #if defined(PETSC_HAVE_MUMPS)
1696 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1697 #endif
1698 #if defined(PETSC_HAVE_PASTIX)
1699 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1700 #endif
1701 EXTERN_C_END
1702 
1703 /*MC
1704    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1705    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1706    the matrix is stored.
1707 
1708   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1709   can call MatSetOption(Mat, MAT_HERMITIAN);
1710 
1711    Options Database Keys:
1712 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1713 
1714   Level: beginner
1715 
1716 .seealso: MatCreateMPISBAIJ
1717 M*/
1718 
1719 EXTERN_C_BEGIN
1720 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1721 EXTERN_C_END
1722 
1723 EXTERN_C_BEGIN
1724 #undef __FUNCT__
1725 #define __FUNCT__ "MatCreate_MPISBAIJ"
1726 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1727 {
1728   Mat_MPISBAIJ   *b;
1729   PetscErrorCode ierr;
1730   PetscBool      flg;
1731 
1732   PetscFunctionBegin;
1733   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1734   B->data = (void*)b;
1735   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1736 
1737   B->ops->destroy    = MatDestroy_MPISBAIJ;
1738   B->ops->view       = MatView_MPISBAIJ;
1739   B->assembled       = PETSC_FALSE;
1740 
1741   B->insertmode = NOT_SET_VALUES;
1742   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1743   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1744 
1745   /* build local table of row and column ownerships */
1746   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1747 
1748   /* build cache for off array entries formed */
1749   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1750   b->donotstash  = PETSC_FALSE;
1751   b->colmap      = PETSC_NULL;
1752   b->garray      = PETSC_NULL;
1753   b->roworiented = PETSC_TRUE;
1754 
1755   /* stuff used in block assembly */
1756   b->barray       = 0;
1757 
1758   /* stuff used for matrix vector multiply */
1759   b->lvec         = 0;
1760   b->Mvctx        = 0;
1761   b->slvec0       = 0;
1762   b->slvec0b      = 0;
1763   b->slvec1       = 0;
1764   b->slvec1a      = 0;
1765   b->slvec1b      = 0;
1766   b->sMvctx       = 0;
1767 
1768   /* stuff for MatGetRow() */
1769   b->rowindices   = 0;
1770   b->rowvalues    = 0;
1771   b->getrowactive = PETSC_FALSE;
1772 
1773   /* hash table stuff */
1774   b->ht           = 0;
1775   b->hd           = 0;
1776   b->ht_size      = 0;
1777   b->ht_flag      = PETSC_FALSE;
1778   b->ht_fact      = 0;
1779   b->ht_total_ct  = 0;
1780   b->ht_insert_ct = 0;
1781 
1782   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1783   b->ijonly       = PETSC_FALSE;
1784 
1785   b->in_loc       = 0;
1786   b->v_loc        = 0;
1787   b->n_loc        = 0;
1788   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1789     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1790     if (flg) {
1791       PetscReal fact = 1.39;
1792       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1793       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1794       if (fact <= 1.0) fact = 1.39;
1795       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1796       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1797     }
1798   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1799 
1800 #if defined(PETSC_HAVE_PASTIX)
1801   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1802                                            "MatGetFactor_mpisbaij_pastix",
1803                                            MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1804 #endif
1805 #if defined(PETSC_HAVE_MUMPS)
1806   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1807                                      "MatGetFactor_sbaij_mumps",
1808                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1809 #endif
1810   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1811                                      "MatStoreValues_MPISBAIJ",
1812                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1814                                      "MatRetrieveValues_MPISBAIJ",
1815                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1816   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1817                                      "MatGetDiagonalBlock_MPISBAIJ",
1818                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1819   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1820                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1821                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1823                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1824                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1825   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1826                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1827                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1828 
1829   B->symmetric                  = PETSC_TRUE;
1830   B->structurally_symmetric     = PETSC_TRUE;
1831   B->symmetric_set              = PETSC_TRUE;
1832   B->structurally_symmetric_set = PETSC_TRUE;
1833   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1834   PetscFunctionReturn(0);
1835 }
1836 EXTERN_C_END
1837 
1838 /*MC
1839    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1840 
1841    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1842    and MATMPISBAIJ otherwise.
1843 
1844    Options Database Keys:
1845 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1846 
1847   Level: beginner
1848 
1849 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1850 M*/
1851 
1852 #undef __FUNCT__
1853 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1854 /*@C
1855    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1856    the user should preallocate the matrix storage by setting the parameters
1857    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1858    performance can be increased by more than a factor of 50.
1859 
1860    Collective on Mat
1861 
1862    Input Parameters:
1863 +  A - the matrix
1864 .  bs   - size of blockk
1865 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1866            submatrix  (same for all local rows)
1867 .  d_nnz - array containing the number of block nonzeros in the various block rows
1868            in the upper triangular and diagonal part of the in diagonal portion of the local
1869            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1870            for the diagonal entry and set a value even if it is zero.
1871 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1872            submatrix (same for all local rows).
1873 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1874            off-diagonal portion of the local submatrix that is right of the diagonal
1875            (possibly different for each block row) or PETSC_NULL.
1876 
1877 
1878    Options Database Keys:
1879 .   -mat_no_unroll - uses code that does not unroll the loops in the
1880                      block calculations (much slower)
1881 .   -mat_block_size - size of the blocks to use
1882 
1883    Notes:
1884 
1885    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1886    than it must be used on all processors that share the object for that argument.
1887 
1888    If the *_nnz parameter is given then the *_nz parameter is ignored
1889 
1890    Storage Information:
1891    For a square global matrix we define each processor's diagonal portion
1892    to be its local rows and the corresponding columns (a square submatrix);
1893    each processor's off-diagonal portion encompasses the remainder of the
1894    local matrix (a rectangular submatrix).
1895 
1896    The user can specify preallocated storage for the diagonal part of
1897    the local submatrix with either d_nz or d_nnz (not both).  Set
1898    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1899    memory allocation.  Likewise, specify preallocated storage for the
1900    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1901 
1902    You can call MatGetInfo() to get information on how effective the preallocation was;
1903    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1904    You can also run with the option -info and look for messages with the string
1905    malloc in them to see if additional memory allocation was needed.
1906 
1907    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1908    the figure below we depict these three local rows and all columns (0-11).
1909 
1910 .vb
1911            0 1 2 3 4 5 6 7 8 9 10 11
1912           -------------------
1913    row 3  |  . . . d d d o o o o o o
1914    row 4  |  . . . d d d o o o o o o
1915    row 5  |  . . . d d d o o o o o o
1916           -------------------
1917 .ve
1918 
1919    Thus, any entries in the d locations are stored in the d (diagonal)
1920    submatrix, and any entries in the o locations are stored in the
1921    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1922    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1923 
1924    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1925    plus the diagonal part of the d matrix,
1926    and o_nz should indicate the number of block nonzeros per row in the o matrix
1927 
1928    In general, for PDE problems in which most nonzeros are near the diagonal,
1929    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1930    or you will get TERRIBLE performance; see the users' manual chapter on
1931    matrices.
1932 
1933    Level: intermediate
1934 
1935 .keywords: matrix, block, aij, compressed row, sparse, parallel
1936 
1937 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1938 @*/
1939 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1940 {
1941   PetscErrorCode ierr;
1942 
1943   PetscFunctionBegin;
1944   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1945   PetscValidType(B,1);
1946   PetscValidLogicalCollectiveInt(B,bs,2);
1947   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1948   PetscFunctionReturn(0);
1949 }
1950 
1951 #undef __FUNCT__
1952 #define __FUNCT__ "MatCreateSBAIJ"
1953 /*@C
1954    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1955    (block compressed row).  For good matrix assembly performance
1956    the user should preallocate the matrix storage by setting the parameters
1957    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1958    performance can be increased by more than a factor of 50.
1959 
1960    Collective on MPI_Comm
1961 
1962    Input Parameters:
1963 +  comm - MPI communicator
1964 .  bs   - size of blockk
1965 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1966            This value should be the same as the local size used in creating the
1967            y vector for the matrix-vector product y = Ax.
1968 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1969            This value should be the same as the local size used in creating the
1970            x vector for the matrix-vector product y = Ax.
1971 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1972 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1973 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1974            submatrix  (same for all local rows)
1975 .  d_nnz - array containing the number of block nonzeros in the various block rows
1976            in the upper triangular portion of the in diagonal portion of the local
1977            (possibly different for each block block row) or PETSC_NULL.
1978            If you plan to factor the matrix you must leave room for the diagonal entry and
1979            set its value even if it is zero.
1980 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1981            submatrix (same for all local rows).
1982 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1983            off-diagonal portion of the local submatrix (possibly different for
1984            each block row) or PETSC_NULL.
1985 
1986    Output Parameter:
1987 .  A - the matrix
1988 
1989    Options Database Keys:
1990 .   -mat_no_unroll - uses code that does not unroll the loops in the
1991                      block calculations (much slower)
1992 .   -mat_block_size - size of the blocks to use
1993 .   -mat_mpi - use the parallel matrix data structures even on one processor
1994                (defaults to using SeqBAIJ format on one processor)
1995 
1996    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1997    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1998    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1999 
2000    Notes:
2001    The number of rows and columns must be divisible by blocksize.
2002    This matrix type does not support complex Hermitian operation.
2003 
2004    The user MUST specify either the local or global matrix dimensions
2005    (possibly both).
2006 
2007    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2008    than it must be used on all processors that share the object for that argument.
2009 
2010    If the *_nnz parameter is given then the *_nz parameter is ignored
2011 
2012    Storage Information:
2013    For a square global matrix we define each processor's diagonal portion
2014    to be its local rows and the corresponding columns (a square submatrix);
2015    each processor's off-diagonal portion encompasses the remainder of the
2016    local matrix (a rectangular submatrix).
2017 
2018    The user can specify preallocated storage for the diagonal part of
2019    the local submatrix with either d_nz or d_nnz (not both).  Set
2020    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2021    memory allocation.  Likewise, specify preallocated storage for the
2022    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2023 
2024    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2025    the figure below we depict these three local rows and all columns (0-11).
2026 
2027 .vb
2028            0 1 2 3 4 5 6 7 8 9 10 11
2029           -------------------
2030    row 3  |  . . . d d d o o o o o o
2031    row 4  |  . . . d d d o o o o o o
2032    row 5  |  . . . d d d o o o o o o
2033           -------------------
2034 .ve
2035 
2036    Thus, any entries in the d locations are stored in the d (diagonal)
2037    submatrix, and any entries in the o locations are stored in the
2038    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2039    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2040 
2041    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2042    plus the diagonal part of the d matrix,
2043    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2044    In general, for PDE problems in which most nonzeros are near the diagonal,
2045    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2046    or you will get TERRIBLE performance; see the users' manual chapter on
2047    matrices.
2048 
2049    Level: intermediate
2050 
2051 .keywords: matrix, block, aij, compressed row, sparse, parallel
2052 
2053 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2054 @*/
2055 
2056 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2057 {
2058   PetscErrorCode ierr;
2059   PetscMPIInt    size;
2060 
2061   PetscFunctionBegin;
2062   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2063   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2064   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2065   if (size > 1) {
2066     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2067     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2068   } else {
2069     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2070     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2071   }
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 
2076 #undef __FUNCT__
2077 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2078 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2079 {
2080   Mat            mat;
2081   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2082   PetscErrorCode ierr;
2083   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2084   PetscScalar    *array;
2085 
2086   PetscFunctionBegin;
2087   *newmat       = 0;
2088   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2089   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2090   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2091   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2092   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2093   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2094 
2095   mat->factortype   = matin->factortype;
2096   mat->preallocated = PETSC_TRUE;
2097   mat->assembled    = PETSC_TRUE;
2098   mat->insertmode   = NOT_SET_VALUES;
2099 
2100   a = (Mat_MPISBAIJ*)mat->data;
2101   a->bs2   = oldmat->bs2;
2102   a->mbs   = oldmat->mbs;
2103   a->nbs   = oldmat->nbs;
2104   a->Mbs   = oldmat->Mbs;
2105   a->Nbs   = oldmat->Nbs;
2106 
2107 
2108   a->size         = oldmat->size;
2109   a->rank         = oldmat->rank;
2110   a->donotstash   = oldmat->donotstash;
2111   a->roworiented  = oldmat->roworiented;
2112   a->rowindices   = 0;
2113   a->rowvalues    = 0;
2114   a->getrowactive = PETSC_FALSE;
2115   a->barray       = 0;
2116   a->rstartbs    = oldmat->rstartbs;
2117   a->rendbs      = oldmat->rendbs;
2118   a->cstartbs    = oldmat->cstartbs;
2119   a->cendbs      = oldmat->cendbs;
2120 
2121   /* hash table stuff */
2122   a->ht           = 0;
2123   a->hd           = 0;
2124   a->ht_size      = 0;
2125   a->ht_flag      = oldmat->ht_flag;
2126   a->ht_fact      = oldmat->ht_fact;
2127   a->ht_total_ct  = 0;
2128   a->ht_insert_ct = 0;
2129 
2130   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2131   if (oldmat->colmap) {
2132 #if defined (PETSC_USE_CTABLE)
2133     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2134 #else
2135     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2136     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2137     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2138 #endif
2139   } else a->colmap = 0;
2140 
2141   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2142     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2143     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2144     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2145   } else a->garray = 0;
2146 
2147   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2148   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2149   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2150   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2151   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2152 
2153   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2154   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2155   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2157 
2158   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2159   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2160   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2161   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2162   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2163   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2164   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2165   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2166   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2167   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2168   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2169   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2170   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2171 
2172   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2173   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2174   a->sMvctx = oldmat->sMvctx;
2175   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2176 
2177   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2178   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2179   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2180   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2181   ierr = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2182   *newmat = mat;
2183   PetscFunctionReturn(0);
2184 }
2185 
2186 #undef __FUNCT__
2187 #define __FUNCT__ "MatLoad_MPISBAIJ"
2188 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2189 {
2190   PetscErrorCode ierr;
2191   PetscInt       i,nz,j,rstart,rend;
2192   PetscScalar    *vals,*buf;
2193   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2194   MPI_Status     status;
2195   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2196   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2197   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2198   PetscInt       bs=1,Mbs,mbs,extra_rows;
2199   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2200   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2201   int            fd;
2202 
2203   PetscFunctionBegin;
2204   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2205     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2206   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2207 
2208   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2209   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2210   if (!rank) {
2211     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2212     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2213     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2214     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2215   }
2216 
2217   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2218 
2219   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2220   M = header[1]; N = header[2];
2221 
2222   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2223   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2224   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2225 
2226   /* If global sizes are set, check if they are consistent with that given in the file */
2227   if (sizesset) {
2228     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2229   }
2230   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2231   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2232 
2233   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2234 
2235   /*
2236      This code adds extra rows to make sure the number of rows is
2237      divisible by the blocksize
2238   */
2239   Mbs        = M/bs;
2240   extra_rows = bs - M + bs*(Mbs);
2241   if (extra_rows == bs) extra_rows = 0;
2242   else                  Mbs++;
2243   if (extra_rows &&!rank) {
2244     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2245   }
2246 
2247   /* determine ownership of all rows */
2248   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2249     mbs        = Mbs/size + ((Mbs % size) > rank);
2250     m          = mbs*bs;
2251   } else { /* User Set */
2252     m          = newmat->rmap->n;
2253     mbs        = m/bs;
2254   }
2255   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2256   mmbs       = PetscMPIIntCast(mbs);
2257   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2258   rowners[0] = 0;
2259   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2260   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2261   rstart = rowners[rank];
2262   rend   = rowners[rank+1];
2263 
2264   /* distribute row lengths to all processors */
2265   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2266   if (!rank) {
2267     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2268     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2269     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2270     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2271     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2272     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2273     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2274   } else {
2275     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2276   }
2277 
2278   if (!rank) {   /* procs[0] */
2279     /* calculate the number of nonzeros on each processor */
2280     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2281     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2282     for (i=0; i<size; i++) {
2283       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2284         procsnz[i] += rowlengths[j];
2285       }
2286     }
2287     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2288 
2289     /* determine max buffer needed and allocate it */
2290     maxnz = 0;
2291     for (i=0; i<size; i++) {
2292       maxnz = PetscMax(maxnz,procsnz[i]);
2293     }
2294     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2295 
2296     /* read in my part of the matrix column indices  */
2297     nz     = procsnz[0];
2298     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2299     mycols = ibuf;
2300     if (size == 1)  nz -= extra_rows;
2301     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2302     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2303 
2304     /* read in every ones (except the last) and ship off */
2305     for (i=1; i<size-1; i++) {
2306       nz   = procsnz[i];
2307       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2308       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2309     }
2310     /* read in the stuff for the last proc */
2311     if (size != 1) {
2312       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2313       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2314       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2315       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2316     }
2317     ierr = PetscFree(cols);CHKERRQ(ierr);
2318   } else {  /* procs[i], i>0 */
2319     /* determine buffer space needed for message */
2320     nz = 0;
2321     for (i=0; i<m; i++) {
2322       nz += locrowlens[i];
2323     }
2324     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2325     mycols = ibuf;
2326     /* receive message of column indices*/
2327     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2328     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2329     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2330   }
2331 
2332   /* loop over local rows, determining number of off diagonal entries */
2333   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2334   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2335   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2336   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2337   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2338   rowcount = 0;
2339   nzcount  = 0;
2340   for (i=0; i<mbs; i++) {
2341     dcount  = 0;
2342     odcount = 0;
2343     for (j=0; j<bs; j++) {
2344       kmax = locrowlens[rowcount];
2345       for (k=0; k<kmax; k++) {
2346         tmp = mycols[nzcount++]/bs; /* block col. index */
2347         if (!mask[tmp]) {
2348           mask[tmp] = 1;
2349           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2350           else masked1[dcount++] = tmp; /* entry in diag portion */
2351         }
2352       }
2353       rowcount++;
2354     }
2355 
2356     dlens[i]  = dcount;  /* d_nzz[i] */
2357     odlens[i] = odcount; /* o_nzz[i] */
2358 
2359     /* zero out the mask elements we set */
2360     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2361     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2362   }
2363     if (!sizesset) {
2364     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2365   }
2366   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2367   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2368 
2369   if (!rank) {
2370     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2371     /* read in my part of the matrix numerical values  */
2372     nz = procsnz[0];
2373     vals = buf;
2374     mycols = ibuf;
2375     if (size == 1)  nz -= extra_rows;
2376     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2377     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2378 
2379     /* insert into matrix */
2380     jj      = rstart*bs;
2381     for (i=0; i<m; i++) {
2382       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2383       mycols += locrowlens[i];
2384       vals   += locrowlens[i];
2385       jj++;
2386     }
2387 
2388     /* read in other processors (except the last one) and ship out */
2389     for (i=1; i<size-1; i++) {
2390       nz   = procsnz[i];
2391       vals = buf;
2392       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2393       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2394     }
2395     /* the last proc */
2396     if (size != 1) {
2397       nz   = procsnz[i] - extra_rows;
2398       vals = buf;
2399       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2400       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2401       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2402     }
2403     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2404 
2405   } else {
2406     /* receive numeric values */
2407     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2408 
2409     /* receive message of values*/
2410     vals   = buf;
2411     mycols = ibuf;
2412     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2413     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2414     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2415 
2416     /* insert into matrix */
2417     jj      = rstart*bs;
2418     for (i=0; i<m; i++) {
2419       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2420       mycols += locrowlens[i];
2421       vals   += locrowlens[i];
2422       jj++;
2423     }
2424   }
2425 
2426   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2427   ierr = PetscFree(buf);CHKERRQ(ierr);
2428   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2429   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2430   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2431   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2432   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2433   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2434   PetscFunctionReturn(0);
2435 }
2436 
2437 #undef __FUNCT__
2438 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2439 /*XXXXX@
2440    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2441 
2442    Input Parameters:
2443 .  mat  - the matrix
2444 .  fact - factor
2445 
2446    Not Collective on Mat, each process can have a different hash factor
2447 
2448    Level: advanced
2449 
2450   Notes:
2451    This can also be set by the command line option: -mat_use_hash_table fact
2452 
2453 .keywords: matrix, hashtable, factor, HT
2454 
2455 .seealso: MatSetOption()
2456 @XXXXX*/
2457 
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2461 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2462 {
2463   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2464   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2465   PetscReal      atmp;
2466   PetscReal      *work,*svalues,*rvalues;
2467   PetscErrorCode ierr;
2468   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2469   PetscMPIInt    rank,size;
2470   PetscInt       *rowners_bs,dest,count,source;
2471   PetscScalar    *va;
2472   MatScalar      *ba;
2473   MPI_Status     stat;
2474 
2475   PetscFunctionBegin;
2476   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2477   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2478   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2479 
2480   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2481   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2482 
2483   bs   = A->rmap->bs;
2484   mbs  = a->mbs;
2485   Mbs  = a->Mbs;
2486   ba   = b->a;
2487   bi   = b->i;
2488   bj   = b->j;
2489 
2490   /* find ownerships */
2491   rowners_bs = A->rmap->range;
2492 
2493   /* each proc creates an array to be distributed */
2494   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2495   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2496 
2497   /* row_max for B */
2498   if (rank != size-1) {
2499     for (i=0; i<mbs; i++) {
2500       ncols = bi[1] - bi[0]; bi++;
2501       brow  = bs*i;
2502       for (j=0; j<ncols; j++) {
2503         bcol = bs*(*bj);
2504         for (kcol=0; kcol<bs; kcol++) {
2505           col = bcol + kcol;                 /* local col index */
2506           col += rowners_bs[rank+1];      /* global col index */
2507           for (krow=0; krow<bs; krow++) {
2508             atmp = PetscAbsScalar(*ba); ba++;
2509             row = brow + krow;    /* local row index */
2510             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2511             if (work[col] < atmp) work[col] = atmp;
2512           }
2513         }
2514         bj++;
2515       }
2516     }
2517 
2518     /* send values to its owners */
2519     for (dest=rank+1; dest<size; dest++) {
2520       svalues = work + rowners_bs[dest];
2521       count   = rowners_bs[dest+1]-rowners_bs[dest];
2522       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2523     }
2524   }
2525 
2526   /* receive values */
2527   if (rank) {
2528     rvalues = work;
2529     count   = rowners_bs[rank+1]-rowners_bs[rank];
2530     for (source=0; source<rank; source++) {
2531       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2532       /* process values */
2533       for (i=0; i<count; i++) {
2534         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2535       }
2536     }
2537   }
2538 
2539   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2540   ierr = PetscFree(work);CHKERRQ(ierr);
2541   PetscFunctionReturn(0);
2542 }
2543 
2544 #undef __FUNCT__
2545 #define __FUNCT__ "MatSOR_MPISBAIJ"
2546 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2547 {
2548   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2549   PetscErrorCode    ierr;
2550   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2551   PetscScalar       *x,*ptr,*from;
2552   Vec               bb1;
2553   const PetscScalar *b;
2554 
2555   PetscFunctionBegin;
2556   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2557   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2558 
2559   if (flag == SOR_APPLY_UPPER) {
2560     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2561     PetscFunctionReturn(0);
2562   }
2563 
2564   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2565     if (flag & SOR_ZERO_INITIAL_GUESS) {
2566       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2567       its--;
2568     }
2569 
2570     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2571     while (its--) {
2572 
2573       /* lower triangular part: slvec0b = - B^T*xx */
2574       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2575 
2576       /* copy xx into slvec0a */
2577       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2578       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2579       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2580       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2581 
2582       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2583 
2584       /* copy bb into slvec1a */
2585       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2586       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2587       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2588       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2589 
2590       /* set slvec1b = 0 */
2591       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2592 
2593       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2594       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2595       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2596       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2597 
2598       /* upper triangular part: bb1 = bb1 - B*x */
2599       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2600 
2601       /* local diagonal sweep */
2602       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2603     }
2604     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2605   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2607   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2608     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2609   } else if (flag & SOR_EISENSTAT) {
2610     Vec               xx1;
2611     PetscBool         hasop;
2612     const PetscScalar *diag;
2613     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2614     PetscInt          i,n;
2615 
2616     if (!mat->xx1) {
2617       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2618       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2619     }
2620     xx1 = mat->xx1;
2621     bb1 = mat->bb1;
2622 
2623     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2624 
2625     if (!mat->diag) {
2626       /* this is wrong for same matrix with new nonzero values */
2627       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2628       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2629     }
2630     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2631 
2632     if (hasop) {
2633       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2634       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2635     } else {
2636       /*
2637           These two lines are replaced by code that may be a bit faster for a good compiler
2638       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2639       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2640       */
2641       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2642       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2643       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2644       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2645       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2646       if (omega == 1.0) {
2647         for (i=0; i<n; i++) {
2648           sl[i] = b[i] - diag[i]*x[i];
2649         }
2650         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2651       } else {
2652         for (i=0; i<n; i++) {
2653           sl[i] = b[i] + scale*diag[i]*x[i];
2654         }
2655         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2656       }
2657       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2658       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2659       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2660       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2661     }
2662 
2663     /* multiply off-diagonal portion of matrix */
2664     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2665     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2666     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2667     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2668     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2669     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2670     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2671     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2672     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2673     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2674 
2675     /* local sweep */
2676     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2677     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2678   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2679   PetscFunctionReturn(0);
2680 }
2681 
2682 #undef __FUNCT__
2683 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2684 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2685 {
2686   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2687   PetscErrorCode ierr;
2688   Vec            lvec1,bb1;
2689 
2690   PetscFunctionBegin;
2691   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2692   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2693 
2694   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2695     if (flag & SOR_ZERO_INITIAL_GUESS) {
2696       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2697       its--;
2698     }
2699 
2700     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2701     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2702     while (its--) {
2703       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2704 
2705       /* lower diagonal part: bb1 = bb - B^T*xx */
2706       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2707       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2708 
2709       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2710       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2711       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2712 
2713       /* upper diagonal part: bb1 = bb1 - B*x */
2714       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2715       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2716 
2717       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2718 
2719       /* diagonal sweep */
2720       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2721     }
2722     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2723     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2724   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2725   PetscFunctionReturn(0);
2726 }
2727 
2728 #undef __FUNCT__
2729 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2730 /*@
2731      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2732          CSR format the local rows.
2733 
2734    Collective on MPI_Comm
2735 
2736    Input Parameters:
2737 +  comm - MPI communicator
2738 .  bs - the block size, only a block size of 1 is supported
2739 .  m - number of local rows (Cannot be PETSC_DECIDE)
2740 .  n - This value should be the same as the local size used in creating the
2741        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2742        calculated if N is given) For square matrices n is almost always m.
2743 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2744 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2745 .   i - row indices
2746 .   j - column indices
2747 -   a - matrix values
2748 
2749    Output Parameter:
2750 .   mat - the matrix
2751 
2752    Level: intermediate
2753 
2754    Notes:
2755        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2756      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2757      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2758 
2759        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2760 
2761 .keywords: matrix, aij, compressed row, sparse, parallel
2762 
2763 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2764           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2765 @*/
2766 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2767 {
2768   PetscErrorCode ierr;
2769 
2770 
2771  PetscFunctionBegin;
2772   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2773   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2774   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2775   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2776   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2777   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2778   PetscFunctionReturn(0);
2779 }
2780 
2781 
2782 #undef __FUNCT__
2783 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2784 /*@C
2785    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2786    (the default parallel PETSc format).
2787 
2788    Collective on MPI_Comm
2789 
2790    Input Parameters:
2791 +  A - the matrix
2792 .  bs - the block size
2793 .  i - the indices into j for the start of each local row (starts with zero)
2794 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2795 -  v - optional values in the matrix
2796 
2797    Level: developer
2798 
2799 .keywords: matrix, aij, compressed row, sparse, parallel
2800 
2801 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2802 @*/
2803 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2804 {
2805   PetscErrorCode ierr;
2806 
2807   PetscFunctionBegin;
2808   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2809   PetscFunctionReturn(0);
2810 }
2811 
2812 
2813