xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a3fa217bfefef3d54995bb82b2b5ea8a3c3e9019)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84     A->nonzerostate++;\
85 a_noinsert:; \
86     ailen[brow] = nrow; \
87   }
88 
89 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90   { \
91     brow = row/bs;  \
92     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93     rmax = bimax[brow]; nrow = bilen[brow]; \
94     bcol = col/bs; \
95     ridx = row % bs; cidx = col % bs; \
96     low  = 0; high = nrow; \
97     while (high-low > 3) { \
98       t = (low+high)/2; \
99       if (rp[t] > bcol) high = t; \
100       else              low  = t; \
101     } \
102     for (_i=low; _i<high; _i++) { \
103       if (rp[_i] > bcol) break; \
104       if (rp[_i] == bcol) { \
105         bap = ap + bs2*_i + bs*cidx + ridx; \
106         if (addv == ADD_VALUES) *bap += value;  \
107         else                    *bap  = value;  \
108         goto b_noinsert; \
109       } \
110     } \
111     if (b->nonew == 1) goto b_noinsert; \
112     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114     N = nrow++ - 1;  \
115     /* shift up all the later entries in this row */ \
116     for (ii=N; ii>=_i; ii--) { \
117       rp[ii+1] = rp[ii]; \
118       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
119     } \
120     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
121     rp[_i]                      = bcol;  \
122     ap[bs2*_i + bs*cidx + ridx] = value;  \
123     B->nonzerostate++;\
124 b_noinsert:; \
125     bilen[brow] = nrow; \
126   }
127 
128 /* Only add/insert a(i,j) with i<=j (blocks).
129    Any a(i,j) with i>j input by user is ingored.
130 */
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSetValues_MPISBAIJ"
133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134 {
135   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
136   MatScalar      value;
137   PetscBool      roworiented = baij->roworiented;
138   PetscErrorCode ierr;
139   PetscInt       i,j,row,col;
140   PetscInt       rstart_orig=mat->rmap->rstart;
141   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
143 
144   /* Some Variables required in the macro */
145   Mat          A     = baij->A;
146   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
147   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148   MatScalar    *aa   =a->a;
149 
150   Mat         B     = baij->B;
151   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
152   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153   MatScalar   *ba   =b->a;
154 
155   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
156   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
157   MatScalar *ap,*bap;
158 
159   /* for stash */
160   PetscInt  n_loc, *in_loc = NULL;
161   MatScalar *v_loc = NULL;
162 
163   PetscFunctionBegin;
164   if (v) PetscValidScalarPointer(v,6);
165   if (!baij->donotstash) {
166     if (n > baij->n_loc) {
167       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
168       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
169       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
171 
172       baij->n_loc = n;
173     }
174     in_loc = baij->in_loc;
175     v_loc  = baij->v_loc;
176   }
177 
178   for (i=0; i<m; i++) {
179     if (im[i] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
182 #endif
183     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
184       row = im[i] - rstart_orig;              /* local row index */
185       for (j=0; j<n; j++) {
186         if (im[i]/bs > in[j]/bs) {
187           if (a->ignore_ltriangular) {
188             continue;    /* ignore lower triangular blocks */
189           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
190         }
191         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
192           col  = in[j] - cstart_orig;         /* local col index */
193           brow = row/bs; bcol = col/bs;
194           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
195           if (roworiented) value = v[i*n+j];
196           else             value = v[i+j*m];
197           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
198           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
199         } else if (in[j] < 0) continue;
200 #if defined(PETSC_USE_DEBUG)
201         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
202 #endif
203         else {  /* off-diag entry (B) */
204           if (mat->was_assembled) {
205             if (!baij->colmap) {
206               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
207             }
208 #if defined(PETSC_USE_CTABLE)
209             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
210             col  = col - 1;
211 #else
212             col = baij->colmap[in[j]/bs] - 1;
213 #endif
214             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
215               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
216               col  =  in[j];
217               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
218               B    = baij->B;
219               b    = (Mat_SeqBAIJ*)(B)->data;
220               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
221               ba   = b->a;
222             } else col += in[j]%bs;
223           } else col = in[j];
224           if (roworiented) value = v[i*n+j];
225           else             value = v[i+j*m];
226           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         }
229       }
230     } else {  /* off processor entry */
231       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
232       if (!baij->donotstash) {
233         mat->assembled = PETSC_FALSE;
234         n_loc          = 0;
235         for (j=0; j<n; j++) {
236           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
237           in_loc[n_loc] = in[j];
238           if (roworiented) {
239             v_loc[n_loc] = v[i*n+j];
240           } else {
241             v_loc[n_loc] = v[j*m+i];
242           }
243           n_loc++;
244         }
245         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
246       }
247     }
248   }
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNCT__
253 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
254 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
255 {
256   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
257   const MatScalar *value;
258   MatScalar       *barray     =baij->barray;
259   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
260   PetscErrorCode  ierr;
261   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
262   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
263   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
264 
265   PetscFunctionBegin;
266   if (!barray) {
267     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
268     baij->barray = barray;
269   }
270 
271   if (roworiented) {
272     stepval = (n-1)*bs;
273   } else {
274     stepval = (m-1)*bs;
275   }
276   for (i=0; i<m; i++) {
277     if (im[i] < 0) continue;
278 #if defined(PETSC_USE_DEBUG)
279     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
280 #endif
281     if (im[i] >= rstart && im[i] < rend) {
282       row = im[i] - rstart;
283       for (j=0; j<n; j++) {
284         if (im[i] > in[j]) {
285           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
286           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
287         }
288         /* If NumCol = 1 then a copy is not required */
289         if ((roworiented) && (n == 1)) {
290           barray = (MatScalar*) v + i*bs2;
291         } else if ((!roworiented) && (m == 1)) {
292           barray = (MatScalar*) v + j*bs2;
293         } else { /* Here a copy is required */
294           if (roworiented) {
295             value = v + i*(stepval+bs)*bs + j*bs;
296           } else {
297             value = v + j*(stepval+bs)*bs + i*bs;
298           }
299           for (ii=0; ii<bs; ii++,value+=stepval) {
300             for (jj=0; jj<bs; jj++) {
301               *barray++ = *value++;
302             }
303           }
304           barray -=bs2;
305         }
306 
307         if (in[j] >= cstart && in[j] < cend) {
308           col  = in[j] - cstart;
309           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
310         } else if (in[j] < 0) continue;
311 #if defined(PETSC_USE_DEBUG)
312         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
313 #endif
314         else {
315           if (mat->was_assembled) {
316             if (!baij->colmap) {
317               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
318             }
319 
320 #if defined(PETSC_USE_DEBUG)
321 #if defined(PETSC_USE_CTABLE)
322             { PetscInt data;
323               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
324               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
325             }
326 #else
327             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328 #endif
329 #endif
330 #if defined(PETSC_USE_CTABLE)
331             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
332             col  = (col - 1)/bs;
333 #else
334             col = (baij->colmap[in[j]] - 1)/bs;
335 #endif
336             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
337               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
338               col  = in[j];
339             }
340           } else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
346       if (!baij->donotstash) {
347         if (roworiented) {
348           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
349         } else {
350           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         }
352       }
353     }
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "MatGetValues_MPISBAIJ"
360 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
361 {
362   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
365   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
366 
367   PetscFunctionBegin;
368   for (i=0; i<m; i++) {
369     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
370     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
371     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
372       row = idxm[i] - bsrstart;
373       for (j=0; j<n; j++) {
374         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
375         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
376         if (idxn[j] >= bscstart && idxn[j] < bscend) {
377           col  = idxn[j] - bscstart;
378           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
379         } else {
380           if (!baij->colmap) {
381             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
382           }
383 #if defined(PETSC_USE_CTABLE)
384           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
385           data--;
386 #else
387           data = baij->colmap[idxn[j]/bs]-1;
388 #endif
389           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
390           else {
391             col  = data + idxn[j]%bs;
392             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
393           }
394         }
395       }
396     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
397   }
398   PetscFunctionReturn(0);
399 }
400 
401 #undef __FUNCT__
402 #define __FUNCT__ "MatNorm_MPISBAIJ"
403 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
404 {
405   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
406   PetscErrorCode ierr;
407   PetscReal      sum[2],*lnorm2;
408 
409   PetscFunctionBegin;
410   if (baij->size == 1) {
411     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
412   } else {
413     if (type == NORM_FROBENIUS) {
414       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
415       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
416       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
417       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
419       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
420       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
421       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
422     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
423       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
424       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
425       PetscReal    *rsum,*rsum2,vabs;
426       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
427       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
428       MatScalar    *v;
429 
430       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
431       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
432       /* Amat */
433       v = amat->a; jj = amat->j;
434       for (brow=0; brow<mbs; brow++) {
435         grow = bs*(rstart + brow);
436         nz   = amat->i[brow+1] - amat->i[brow];
437         for (bcol=0; bcol<nz; bcol++) {
438           gcol = bs*(rstart + *jj); jj++;
439           for (col=0; col<bs; col++) {
440             for (row=0; row<bs; row++) {
441               vabs            = PetscAbsScalar(*v); v++;
442               rsum[gcol+col] += vabs;
443               /* non-diagonal block */
444               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
445             }
446           }
447         }
448       }
449       /* Bmat */
450       v = bmat->a; jj = bmat->j;
451       for (brow=0; brow<mbs; brow++) {
452         grow = bs*(rstart + brow);
453         nz = bmat->i[brow+1] - bmat->i[brow];
454         for (bcol=0; bcol<nz; bcol++) {
455           gcol = bs*garray[*jj]; jj++;
456           for (col=0; col<bs; col++) {
457             for (row=0; row<bs; row++) {
458               vabs            = PetscAbsScalar(*v); v++;
459               rsum[gcol+col] += vabs;
460               rsum[grow+row] += vabs;
461             }
462           }
463         }
464       }
465       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
466       *norm = 0.0;
467       for (col=0; col<mat->cmap->N; col++) {
468         if (rsum2[col] > *norm) *norm = rsum2[col];
469       }
470       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
471     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
472   }
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
478 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
481   PetscErrorCode ierr;
482   PetscInt       nstash,reallocs;
483   InsertMode     addv;
484 
485   PetscFunctionBegin;
486   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
487 
488   /* make sure all processors are either in INSERTMODE or ADDMODE */
489   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
490   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
491   mat->insertmode = addv; /* in case this processor had no cache */
492 
493   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
494   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
495   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
496   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
504 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
505 {
506   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
507   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
508   PetscErrorCode ierr;
509   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
510   PetscInt       *row,*col;
511   PetscBool      other_disassembled;
512   PetscMPIInt    n;
513   PetscBool      r1,r2,r3;
514   MatScalar      *val;
515   InsertMode     addv = mat->insertmode;
516 
517   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
518   PetscFunctionBegin;
519   if (!baij->donotstash &&  !mat->nooffprocentries) {
520     while (1) {
521       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
522       if (!flg) break;
523 
524       for (i=0; i<n;) {
525         /* Now identify the consecutive vals belonging to the same row */
526         for (j=i,rstart=row[j]; j<n; j++) {
527           if (row[j] != rstart) break;
528         }
529         if (j < n) ncols = j-i;
530         else       ncols = n-i;
531         /* Now assemble all these values with a single function call */
532         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
533         i    = j;
534       }
535     }
536     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
537     /* Now process the block-stash. Since the values are stashed column-oriented,
538        set the roworiented flag to column oriented, and after MatSetValues()
539        restore the original flags */
540     r1 = baij->roworiented;
541     r2 = a->roworiented;
542     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
543 
544     baij->roworiented = PETSC_FALSE;
545     a->roworiented    = PETSC_FALSE;
546 
547     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
548     while (1) {
549       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
550       if (!flg) break;
551 
552       for (i=0; i<n;) {
553         /* Now identify the consecutive vals belonging to the same row */
554         for (j=i,rstart=row[j]; j<n; j++) {
555           if (row[j] != rstart) break;
556         }
557         if (j < n) ncols = j-i;
558         else       ncols = n-i;
559         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
560         i    = j;
561       }
562     }
563     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
564 
565     baij->roworiented = r1;
566     a->roworiented    = r2;
567 
568     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
569   }
570 
571   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
572   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
573 
574   /* determine if any processor has disassembled, if so we must
575      also disassemble ourselfs, in order that we may reassemble. */
576   /*
577      if nonzero structure of submatrix B cannot change then we know that
578      no processor disassembled thus we can skip this stuff
579   */
580   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
581     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
582     if (mat->was_assembled && !other_disassembled) {
583       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
584     }
585   }
586 
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
589   }
590   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
591   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
592 
593   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
594 
595   baij->rowvalues = 0;
596   {
597     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
598     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
599   }
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #include <petscdraw.h>
605 #undef __FUNCT__
606 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
607 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
608 {
609   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
610   PetscErrorCode    ierr;
611   PetscInt          bs   = mat->rmap->bs;
612   PetscMPIInt       size = baij->size,rank = baij->rank;
613   PetscBool         iascii,isdraw;
614   PetscViewer       sviewer;
615   PetscViewerFormat format;
616 
617   PetscFunctionBegin;
618   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
619   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
620   if (iascii) {
621     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
622     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
623       MatInfo info;
624       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
625       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
634       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
635       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_INFO) {
638       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
639       PetscFunctionReturn(0);
640     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
641       PetscFunctionReturn(0);
642     }
643   }
644 
645   if (isdraw) {
646     PetscDraw draw;
647     PetscBool isnull;
648     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
649     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
650   }
651 
652   if (size == 1) {
653     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
654     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
655   } else {
656     /* assemble the entire matrix onto first processor. */
657     Mat          A;
658     Mat_SeqSBAIJ *Aloc;
659     Mat_SeqBAIJ  *Bloc;
660     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
661     MatScalar    *a;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
672     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
673     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
674 
675     /* copy over the A part */
676     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
677     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
678     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
679 
680     for (i=0; i<mbs; i++) {
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = (baij->cstartbs+aj[j])*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++;
688           a += bs;
689         }
690       }
691     }
692     /* copy over the B part */
693     Bloc = (Mat_SeqBAIJ*)baij->B->data;
694     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
695     for (i=0; i<mbs; i++) {
696 
697       rvals[0] = bs*(baij->rstartbs + i);
698       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
699       for (j=ai[i]; j<ai[i+1]; j++) {
700         col = baij->garray[aj[j]]*bs;
701         for (k=0; k<bs; k++) {
702           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
703           col++;
704           a += bs;
705         }
706       }
707     }
708     ierr = PetscFree(rvals);CHKERRQ(ierr);
709     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
711     /*
712        Everyone has to call to draw the matrix since the graphics waits are
713        synchronized across all processors that share the PetscDraw object
714     */
715     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     if (!rank) {
717       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
718       /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
719       ierr = PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);CHKERRQ(ierr);
720       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
721     }
722     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
723     ierr = MatDestroy(&A);CHKERRQ(ierr);
724   }
725   PetscFunctionReturn(0);
726 }
727 
728 #undef __FUNCT__
729 #define __FUNCT__ "MatView_MPISBAIJ"
730 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
731 {
732   PetscErrorCode ierr;
733   PetscBool      iascii,isdraw,issocket,isbinary;
734 
735   PetscFunctionBegin;
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
737   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
740   if (iascii || isdraw || issocket || isbinary) {
741     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
742   }
743   PetscFunctionReturn(0);
744 }
745 
746 #undef __FUNCT__
747 #define __FUNCT__ "MatDestroy_MPISBAIJ"
748 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
749 {
750   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
751   PetscErrorCode ierr;
752 
753   PetscFunctionBegin;
754 #if defined(PETSC_USE_LOG)
755   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
756 #endif
757   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
758   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
760   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
761 #if defined(PETSC_USE_CTABLE)
762   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
763 #else
764   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
765 #endif
766   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
768   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
774   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
775   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
776   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
777   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
781 #if defined(PETSC_USE_REAL_MAT_SINGLE)
782   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
783 #endif
784   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
787   ierr = PetscFree(mat->data);CHKERRQ(ierr);
788 
789   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
795   PetscFunctionReturn(0);
796 }
797 
798 #undef __FUNCT__
799 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
800 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
801 {
802   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
803   PetscErrorCode ierr;
804   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
805   PetscScalar    *x,*from;
806 
807   PetscFunctionBegin;
808   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
809   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
810 
811   /* diagonal part */
812   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
813   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
814 
815   /* subdiagonal part */
816   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
817 
818   /* copy x into the vec slvec0 */
819   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
823   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
824   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
825 
826   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
828   /* supperdiagonal part */
829   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
830   PetscFunctionReturn(0);
831 }
832 
833 #undef __FUNCT__
834 #define __FUNCT__ "MatMult_MPISBAIJ"
835 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
836 {
837   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
838   PetscErrorCode ierr;
839   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
840   PetscScalar    *x,*from;
841 
842   PetscFunctionBegin;
843   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
844   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
845 
846   /* diagonal part */
847   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
848   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
849 
850   /* subdiagonal part */
851   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
852 
853   /* copy x into the vec slvec0 */
854   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
858   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
859   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
860 
861   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
863   /* supperdiagonal part */
864   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
865   PetscFunctionReturn(0);
866 }
867 
868 #undef __FUNCT__
869 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
870 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
871 {
872   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
873   PetscErrorCode ierr;
874   PetscInt       nt;
875 
876   PetscFunctionBegin;
877   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
878   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
879 
880   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
881   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
882 
883   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   /* do diagonal part */
885   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
886   /* do supperdiagonal part */
887   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
888   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
889   /* do subdiagonal part */
890   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
891   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
934 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
937   PetscErrorCode ierr;
938 
939   PetscFunctionBegin;
940   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
941   /* do diagonal part */
942   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
943   /* do supperdiagonal part */
944   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
945   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
946 
947   /* do subdiagonal part */
948   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
949   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) max = tmp;
1009     }
1010     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1047             else break;
1048           }
1049           imark = i;
1050         }
1051         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1052         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1053       }
1054     } else {
1055       if (idx) *idx = 0;
1056       if (v)   *v   = 0;
1057     }
1058   }
1059   *nz  = nztot;
1060   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1061   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1067 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1068 {
1069   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1070 
1071   PetscFunctionBegin;
1072   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1073   baij->getrowactive = PETSC_FALSE;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1079 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1080 {
1081   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1082   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1083 
1084   PetscFunctionBegin;
1085   aA->getrow_utriangular = PETSC_TRUE;
1086   PetscFunctionReturn(0);
1087 }
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1090 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1091 {
1092   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1093   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1094 
1095   PetscFunctionBegin;
1096   aA->getrow_utriangular = PETSC_FALSE;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1102 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1103 {
1104   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1109   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1115 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1116 {
1117   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1122   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1128 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1129 {
1130   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1135   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1141 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1142 {
1143   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1144   Mat            A  = a->A,B = a->B;
1145   PetscErrorCode ierr;
1146   PetscReal      isend[5],irecv[5];
1147 
1148   PetscFunctionBegin;
1149   info->block_size = (PetscReal)matin->rmap->bs;
1150 
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152 
1153   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1154   isend[3] = info->memory;  isend[4] = info->mallocs;
1155 
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157 
1158   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1159   isend[3] += info->memory;  isend[4] += info->mallocs;
1160   if (flag == MAT_LOCAL) {
1161     info->nz_used      = isend[0];
1162     info->nz_allocated = isend[1];
1163     info->nz_unneeded  = isend[2];
1164     info->memory       = isend[3];
1165     info->mallocs      = isend[4];
1166   } else if (flag == MAT_GLOBAL_MAX) {
1167     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1168 
1169     info->nz_used      = irecv[0];
1170     info->nz_allocated = irecv[1];
1171     info->nz_unneeded  = irecv[2];
1172     info->memory       = irecv[3];
1173     info->mallocs      = irecv[4];
1174   } else if (flag == MAT_GLOBAL_SUM) {
1175     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1176 
1177     info->nz_used      = irecv[0];
1178     info->nz_allocated = irecv[1];
1179     info->nz_unneeded  = irecv[2];
1180     info->memory       = irecv[3];
1181     info->mallocs      = irecv[4];
1182   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1183   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1184   info->fill_ratio_needed = 0;
1185   info->factor_mallocs    = 0;
1186   PetscFunctionReturn(0);
1187 }
1188 
1189 #undef __FUNCT__
1190 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1191 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1192 {
1193   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1194   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1195   PetscErrorCode ierr;
1196 
1197   PetscFunctionBegin;
1198   switch (op) {
1199   case MAT_NEW_NONZERO_LOCATIONS:
1200   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1201   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1202   case MAT_KEEP_NONZERO_PATTERN:
1203   case MAT_NEW_NONZERO_LOCATION_ERR:
1204     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1206     break;
1207   case MAT_ROW_ORIENTED:
1208     a->roworiented = flg;
1209 
1210     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1211     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1212     break;
1213   case MAT_NEW_DIAGONALS:
1214     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1215     break;
1216   case MAT_IGNORE_OFF_PROC_ENTRIES:
1217     a->donotstash = flg;
1218     break;
1219   case MAT_USE_HASH_TABLE:
1220     a->ht_flag = flg;
1221     break;
1222   case MAT_HERMITIAN:
1223     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1224     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1225 
1226     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1227     break;
1228   case MAT_SPD:
1229     A->spd_set = PETSC_TRUE;
1230     A->spd     = flg;
1231     if (flg) {
1232       A->symmetric                  = PETSC_TRUE;
1233       A->structurally_symmetric     = PETSC_TRUE;
1234       A->symmetric_set              = PETSC_TRUE;
1235       A->structurally_symmetric_set = PETSC_TRUE;
1236     }
1237     break;
1238   case MAT_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_STRUCTURALLY_SYMMETRIC:
1242     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1243     break;
1244   case MAT_SYMMETRY_ETERNAL:
1245     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1246     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1247     break;
1248   case MAT_IGNORE_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_ERROR_LOWER_TRIANGULAR:
1252     aA->ignore_ltriangular = flg;
1253     break;
1254   case MAT_GETROW_UPPERTRIANGULAR:
1255     aA->getrow_utriangular = flg;
1256     break;
1257   default:
1258     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1259   }
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1265 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1266 {
1267   PetscErrorCode ierr;
1268 
1269   PetscFunctionBegin;
1270   if (MAT_INITIAL_MATRIX || *B != A) {
1271     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1278 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1279 {
1280   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1281   Mat            a     = baij->A, b=baij->B;
1282   PetscErrorCode ierr;
1283   PetscInt       nv,m,n;
1284   PetscBool      flg;
1285 
1286   PetscFunctionBegin;
1287   if (ll != rr) {
1288     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1289     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1290   }
1291   if (!ll) PetscFunctionReturn(0);
1292 
1293   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1294   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1295 
1296   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1297   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1298 
1299   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1300 
1301   /* left diagonalscale the off-diagonal part */
1302   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1303 
1304   /* scale the diagonal part */
1305   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1306 
1307   /* right diagonalscale the off-diagonal part */
1308   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1309   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1315 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1316 {
1317   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1318   PetscErrorCode ierr;
1319 
1320   PetscFunctionBegin;
1321   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1322   PetscFunctionReturn(0);
1323 }
1324 
1325 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1326 
1327 #undef __FUNCT__
1328 #define __FUNCT__ "MatEqual_MPISBAIJ"
1329 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1330 {
1331   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1332   Mat            a,b,c,d;
1333   PetscBool      flg;
1334   PetscErrorCode ierr;
1335 
1336   PetscFunctionBegin;
1337   a = matA->A; b = matA->B;
1338   c = matB->A; d = matB->B;
1339 
1340   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1341   if (flg) {
1342     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1343   }
1344   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1345   PetscFunctionReturn(0);
1346 }
1347 
1348 #undef __FUNCT__
1349 #define __FUNCT__ "MatCopy_MPISBAIJ"
1350 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1351 {
1352   PetscErrorCode ierr;
1353   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1354   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1355 
1356   PetscFunctionBegin;
1357   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1358   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1359     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1360     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1361     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1362   } else {
1363     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1364     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1365   }
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1371 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1372 {
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1382 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1383 {
1384   PetscErrorCode ierr;
1385   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1386   PetscBLASInt   bnz,one=1;
1387   Mat_SeqSBAIJ   *xa,*ya;
1388   Mat_SeqBAIJ    *xb,*yb;
1389 
1390   PetscFunctionBegin;
1391   if (str == SAME_NONZERO_PATTERN) {
1392     PetscScalar alpha = a;
1393     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1394     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1395     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1396     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1397     xb   = (Mat_SeqBAIJ*)xx->B->data;
1398     yb   = (Mat_SeqBAIJ*)yy->B->data;
1399     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1400     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1401     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1402   } else {
1403     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1404     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1405     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1406   }
1407   PetscFunctionReturn(0);
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1412 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1413 {
1414   PetscErrorCode ierr;
1415   PetscInt       i;
1416   PetscBool      flg;
1417 
1418   PetscFunctionBegin;
1419   for (i=0; i<n; i++) {
1420     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1421     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1422   }
1423   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1424   PetscFunctionReturn(0);
1425 }
1426 
1427 
1428 /* -------------------------------------------------------------------*/
1429 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1430                                        MatGetRow_MPISBAIJ,
1431                                        MatRestoreRow_MPISBAIJ,
1432                                        MatMult_MPISBAIJ,
1433                                /*  4*/ MatMultAdd_MPISBAIJ,
1434                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1435                                        MatMultAdd_MPISBAIJ,
1436                                        0,
1437                                        0,
1438                                        0,
1439                                /* 10*/ 0,
1440                                        0,
1441                                        0,
1442                                        MatSOR_MPISBAIJ,
1443                                        MatTranspose_MPISBAIJ,
1444                                /* 15*/ MatGetInfo_MPISBAIJ,
1445                                        MatEqual_MPISBAIJ,
1446                                        MatGetDiagonal_MPISBAIJ,
1447                                        MatDiagonalScale_MPISBAIJ,
1448                                        MatNorm_MPISBAIJ,
1449                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1450                                        MatAssemblyEnd_MPISBAIJ,
1451                                        MatSetOption_MPISBAIJ,
1452                                        MatZeroEntries_MPISBAIJ,
1453                                /* 24*/ 0,
1454                                        0,
1455                                        0,
1456                                        0,
1457                                        0,
1458                                /* 29*/ MatSetUp_MPISBAIJ,
1459                                        0,
1460                                        0,
1461                                        0,
1462                                        0,
1463                                /* 34*/ MatDuplicate_MPISBAIJ,
1464                                        0,
1465                                        0,
1466                                        0,
1467                                        0,
1468                                /* 39*/ MatAXPY_MPISBAIJ,
1469                                        MatGetSubMatrices_MPISBAIJ,
1470                                        MatIncreaseOverlap_MPISBAIJ,
1471                                        MatGetValues_MPISBAIJ,
1472                                        MatCopy_MPISBAIJ,
1473                                /* 44*/ 0,
1474                                        MatScale_MPISBAIJ,
1475                                        0,
1476                                        0,
1477                                        0,
1478                                /* 49*/ 0,
1479                                        0,
1480                                        0,
1481                                        0,
1482                                        0,
1483                                /* 54*/ 0,
1484                                        0,
1485                                        MatSetUnfactored_MPISBAIJ,
1486                                        0,
1487                                        MatSetValuesBlocked_MPISBAIJ,
1488                                /* 59*/ 0,
1489                                        0,
1490                                        0,
1491                                        0,
1492                                        0,
1493                                /* 64*/ 0,
1494                                        0,
1495                                        0,
1496                                        0,
1497                                        0,
1498                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1499                                        0,
1500                                        0,
1501                                        0,
1502                                        0,
1503                                /* 74*/ 0,
1504                                        0,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                /* 79*/ 0,
1509                                        0,
1510                                        0,
1511                                        0,
1512                                        MatLoad_MPISBAIJ,
1513                                /* 84*/ 0,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                        0,
1518                                /* 89*/ 0,
1519                                        0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                /* 94*/ 0,
1524                                        0,
1525                                        0,
1526                                        0,
1527                                        0,
1528                                /* 99*/ 0,
1529                                        0,
1530                                        0,
1531                                        0,
1532                                        0,
1533                                /*104*/ 0,
1534                                        MatRealPart_MPISBAIJ,
1535                                        MatImaginaryPart_MPISBAIJ,
1536                                        MatGetRowUpperTriangular_MPISBAIJ,
1537                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1538                                /*109*/ 0,
1539                                        0,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                /*114*/ 0,
1544                                        0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                /*119*/ 0,
1549                                        0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                /*124*/ 0,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                /*129*/ 0,
1559                                        0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                /*134*/ 0,
1564                                        0,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                /*139*/ 0,
1569                                        0,
1570                                        0
1571 };
1572 
1573 
1574 #undef __FUNCT__
1575 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1576 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1577 {
1578   PetscFunctionBegin;
1579   *a = ((Mat_MPISBAIJ*)A->data)->A;
1580   PetscFunctionReturn(0);
1581 }
1582 
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1585 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1586 {
1587   Mat_MPISBAIJ   *b;
1588   PetscErrorCode ierr;
1589   PetscInt       i,mbs,Mbs;
1590 
1591   PetscFunctionBegin;
1592   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1593   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1594   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1595   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1596 
1597   b   = (Mat_MPISBAIJ*)B->data;
1598   mbs = B->rmap->n/bs;
1599   Mbs = B->rmap->N/bs;
1600   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1601 
1602   B->rmap->bs = bs;
1603   b->bs2      = bs*bs;
1604   b->mbs      = mbs;
1605   b->nbs      = mbs;
1606   b->Mbs      = Mbs;
1607   b->Nbs      = Mbs;
1608 
1609   for (i=0; i<=b->size; i++) {
1610     b->rangebs[i] = B->rmap->range[i]/bs;
1611   }
1612   b->rstartbs = B->rmap->rstart/bs;
1613   b->rendbs   = B->rmap->rend/bs;
1614 
1615   b->cstartbs = B->cmap->rstart/bs;
1616   b->cendbs   = B->cmap->rend/bs;
1617 
1618   if (!B->preallocated) {
1619     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1620     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1621     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1622     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1623     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1624     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1625     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1626     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1627     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1628   }
1629 
1630   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1631   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1632 
1633   B->preallocated = PETSC_TRUE;
1634   PetscFunctionReturn(0);
1635 }
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1639 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1640 {
1641   PetscInt       m,rstart,cstart,cend;
1642   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1643   const PetscInt *JJ    =0;
1644   PetscScalar    *values=0;
1645   PetscErrorCode ierr;
1646 
1647   PetscFunctionBegin;
1648   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1649   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1650   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1651   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1652   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1653   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1654   m      = B->rmap->n/bs;
1655   rstart = B->rmap->rstart/bs;
1656   cstart = B->cmap->rstart/bs;
1657   cend   = B->cmap->rend/bs;
1658 
1659   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1660   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1661   for (i=0; i<m; i++) {
1662     nz = ii[i+1] - ii[i];
1663     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1664     nz_max = PetscMax(nz_max,nz);
1665     JJ     = jj + ii[i];
1666     for (j=0; j<nz; j++) {
1667       if (*JJ >= cstart) break;
1668       JJ++;
1669     }
1670     d = 0;
1671     for (; j<nz; j++) {
1672       if (*JJ++ >= cend) break;
1673       d++;
1674     }
1675     d_nnz[i] = d;
1676     o_nnz[i] = nz - d;
1677   }
1678   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1679   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1680 
1681   values = (PetscScalar*)V;
1682   if (!values) {
1683     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1684     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1685   }
1686   for (i=0; i<m; i++) {
1687     PetscInt          row    = i + rstart;
1688     PetscInt          ncols  = ii[i+1] - ii[i];
1689     const PetscInt    *icols = jj + ii[i];
1690     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1691     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1692   }
1693 
1694   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1695   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1698   PetscFunctionReturn(0);
1699 }
1700 
1701 #if defined(PETSC_HAVE_MUMPS)
1702 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1703 #endif
1704 #if defined(PETSC_HAVE_PASTIX)
1705 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1706 #endif
1707 
1708 /*MC
1709    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1710    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1711    the matrix is stored.
1712 
1713   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1714   can call MatSetOption(Mat, MAT_HERMITIAN);
1715 
1716    Options Database Keys:
1717 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1718 
1719   Level: beginner
1720 
1721 .seealso: MatCreateMPISBAIJ
1722 M*/
1723 
1724 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatCreate_MPISBAIJ"
1728 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1729 {
1730   Mat_MPISBAIJ   *b;
1731   PetscErrorCode ierr;
1732   PetscBool      flg;
1733 
1734   PetscFunctionBegin;
1735   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1736   B->data = (void*)b;
1737   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1738 
1739   B->ops->destroy = MatDestroy_MPISBAIJ;
1740   B->ops->view    = MatView_MPISBAIJ;
1741   B->assembled    = PETSC_FALSE;
1742   B->insertmode   = NOT_SET_VALUES;
1743 
1744   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1745   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1746 
1747   /* build local table of row and column ownerships */
1748   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1749 
1750   /* build cache for off array entries formed */
1751   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1752 
1753   b->donotstash  = PETSC_FALSE;
1754   b->colmap      = NULL;
1755   b->garray      = NULL;
1756   b->roworiented = PETSC_TRUE;
1757 
1758   /* stuff used in block assembly */
1759   b->barray = 0;
1760 
1761   /* stuff used for matrix vector multiply */
1762   b->lvec    = 0;
1763   b->Mvctx   = 0;
1764   b->slvec0  = 0;
1765   b->slvec0b = 0;
1766   b->slvec1  = 0;
1767   b->slvec1a = 0;
1768   b->slvec1b = 0;
1769   b->sMvctx  = 0;
1770 
1771   /* stuff for MatGetRow() */
1772   b->rowindices   = 0;
1773   b->rowvalues    = 0;
1774   b->getrowactive = PETSC_FALSE;
1775 
1776   /* hash table stuff */
1777   b->ht           = 0;
1778   b->hd           = 0;
1779   b->ht_size      = 0;
1780   b->ht_flag      = PETSC_FALSE;
1781   b->ht_fact      = 0;
1782   b->ht_total_ct  = 0;
1783   b->ht_insert_ct = 0;
1784 
1785   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1786   b->ijonly = PETSC_FALSE;
1787 
1788   b->in_loc = 0;
1789   b->v_loc  = 0;
1790   b->n_loc  = 0;
1791   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1792   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1793   if (flg) {
1794     PetscReal fact = 1.39;
1795     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1796     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1797     if (fact <= 1.0) fact = 1.39;
1798     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1799     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1800   }
1801   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1802 
1803 #if defined(PETSC_HAVE_PASTIX)
1804   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1805 #endif
1806 #if defined(PETSC_HAVE_MUMPS)
1807   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1808 #endif
1809   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1810   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1815 
1816   B->symmetric                  = PETSC_TRUE;
1817   B->structurally_symmetric     = PETSC_TRUE;
1818   B->symmetric_set              = PETSC_TRUE;
1819   B->structurally_symmetric_set = PETSC_TRUE;
1820 
1821   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1822   PetscFunctionReturn(0);
1823 }
1824 
1825 /*MC
1826    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1827 
1828    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1829    and MATMPISBAIJ otherwise.
1830 
1831    Options Database Keys:
1832 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1833 
1834   Level: beginner
1835 
1836 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1837 M*/
1838 
1839 #undef __FUNCT__
1840 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1841 /*@C
1842    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1843    the user should preallocate the matrix storage by setting the parameters
1844    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1845    performance can be increased by more than a factor of 50.
1846 
1847    Collective on Mat
1848 
1849    Input Parameters:
1850 +  A - the matrix
1851 .  bs   - size of blockk
1852 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1853            submatrix  (same for all local rows)
1854 .  d_nnz - array containing the number of block nonzeros in the various block rows
1855            in the upper triangular and diagonal part of the in diagonal portion of the local
1856            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1857            for the diagonal entry and set a value even if it is zero.
1858 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1859            submatrix (same for all local rows).
1860 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1861            off-diagonal portion of the local submatrix that is right of the diagonal
1862            (possibly different for each block row) or NULL.
1863 
1864 
1865    Options Database Keys:
1866 .   -mat_no_unroll - uses code that does not unroll the loops in the
1867                      block calculations (much slower)
1868 .   -mat_block_size - size of the blocks to use
1869 
1870    Notes:
1871 
1872    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1873    than it must be used on all processors that share the object for that argument.
1874 
1875    If the *_nnz parameter is given then the *_nz parameter is ignored
1876 
1877    Storage Information:
1878    For a square global matrix we define each processor's diagonal portion
1879    to be its local rows and the corresponding columns (a square submatrix);
1880    each processor's off-diagonal portion encompasses the remainder of the
1881    local matrix (a rectangular submatrix).
1882 
1883    The user can specify preallocated storage for the diagonal part of
1884    the local submatrix with either d_nz or d_nnz (not both).  Set
1885    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1886    memory allocation.  Likewise, specify preallocated storage for the
1887    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1888 
1889    You can call MatGetInfo() to get information on how effective the preallocation was;
1890    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1891    You can also run with the option -info and look for messages with the string
1892    malloc in them to see if additional memory allocation was needed.
1893 
1894    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1895    the figure below we depict these three local rows and all columns (0-11).
1896 
1897 .vb
1898            0 1 2 3 4 5 6 7 8 9 10 11
1899           --------------------------
1900    row 3  |. . . d d d o o o o  o  o
1901    row 4  |. . . d d d o o o o  o  o
1902    row 5  |. . . d d d o o o o  o  o
1903           --------------------------
1904 .ve
1905 
1906    Thus, any entries in the d locations are stored in the d (diagonal)
1907    submatrix, and any entries in the o locations are stored in the
1908    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1909    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1910 
1911    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1912    plus the diagonal part of the d matrix,
1913    and o_nz should indicate the number of block nonzeros per row in the o matrix
1914 
1915    In general, for PDE problems in which most nonzeros are near the diagonal,
1916    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1917    or you will get TERRIBLE performance; see the users' manual chapter on
1918    matrices.
1919 
1920    Level: intermediate
1921 
1922 .keywords: matrix, block, aij, compressed row, sparse, parallel
1923 
1924 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1925 @*/
1926 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1927 {
1928   PetscErrorCode ierr;
1929 
1930   PetscFunctionBegin;
1931   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1932   PetscValidType(B,1);
1933   PetscValidLogicalCollectiveInt(B,bs,2);
1934   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1935   PetscFunctionReturn(0);
1936 }
1937 
1938 #undef __FUNCT__
1939 #define __FUNCT__ "MatCreateSBAIJ"
1940 /*@C
1941    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1942    (block compressed row).  For good matrix assembly performance
1943    the user should preallocate the matrix storage by setting the parameters
1944    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1945    performance can be increased by more than a factor of 50.
1946 
1947    Collective on MPI_Comm
1948 
1949    Input Parameters:
1950 +  comm - MPI communicator
1951 .  bs   - size of blockk
1952 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1953            This value should be the same as the local size used in creating the
1954            y vector for the matrix-vector product y = Ax.
1955 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1956            This value should be the same as the local size used in creating the
1957            x vector for the matrix-vector product y = Ax.
1958 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1959 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1960 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1961            submatrix  (same for all local rows)
1962 .  d_nnz - array containing the number of block nonzeros in the various block rows
1963            in the upper triangular portion of the in diagonal portion of the local
1964            (possibly different for each block block row) or NULL.
1965            If you plan to factor the matrix you must leave room for the diagonal entry and
1966            set its value even if it is zero.
1967 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1968            submatrix (same for all local rows).
1969 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1970            off-diagonal portion of the local submatrix (possibly different for
1971            each block row) or NULL.
1972 
1973    Output Parameter:
1974 .  A - the matrix
1975 
1976    Options Database Keys:
1977 .   -mat_no_unroll - uses code that does not unroll the loops in the
1978                      block calculations (much slower)
1979 .   -mat_block_size - size of the blocks to use
1980 .   -mat_mpi - use the parallel matrix data structures even on one processor
1981                (defaults to using SeqBAIJ format on one processor)
1982 
1983    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1984    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1985    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1986 
1987    Notes:
1988    The number of rows and columns must be divisible by blocksize.
1989    This matrix type does not support complex Hermitian operation.
1990 
1991    The user MUST specify either the local or global matrix dimensions
1992    (possibly both).
1993 
1994    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1995    than it must be used on all processors that share the object for that argument.
1996 
1997    If the *_nnz parameter is given then the *_nz parameter is ignored
1998 
1999    Storage Information:
2000    For a square global matrix we define each processor's diagonal portion
2001    to be its local rows and the corresponding columns (a square submatrix);
2002    each processor's off-diagonal portion encompasses the remainder of the
2003    local matrix (a rectangular submatrix).
2004 
2005    The user can specify preallocated storage for the diagonal part of
2006    the local submatrix with either d_nz or d_nnz (not both).  Set
2007    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2008    memory allocation.  Likewise, specify preallocated storage for the
2009    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2010 
2011    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2012    the figure below we depict these three local rows and all columns (0-11).
2013 
2014 .vb
2015            0 1 2 3 4 5 6 7 8 9 10 11
2016           --------------------------
2017    row 3  |. . . d d d o o o o  o  o
2018    row 4  |. . . d d d o o o o  o  o
2019    row 5  |. . . d d d o o o o  o  o
2020           --------------------------
2021 .ve
2022 
2023    Thus, any entries in the d locations are stored in the d (diagonal)
2024    submatrix, and any entries in the o locations are stored in the
2025    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2026    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2027 
2028    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2029    plus the diagonal part of the d matrix,
2030    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2031    In general, for PDE problems in which most nonzeros are near the diagonal,
2032    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2033    or you will get TERRIBLE performance; see the users' manual chapter on
2034    matrices.
2035 
2036    Level: intermediate
2037 
2038 .keywords: matrix, block, aij, compressed row, sparse, parallel
2039 
2040 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2041 @*/
2042 
2043 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2044 {
2045   PetscErrorCode ierr;
2046   PetscMPIInt    size;
2047 
2048   PetscFunctionBegin;
2049   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2050   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2051   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2052   if (size > 1) {
2053     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2054     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2055   } else {
2056     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2057     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2058   }
2059   PetscFunctionReturn(0);
2060 }
2061 
2062 
2063 #undef __FUNCT__
2064 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2065 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2066 {
2067   Mat            mat;
2068   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2069   PetscErrorCode ierr;
2070   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2071   PetscScalar    *array;
2072 
2073   PetscFunctionBegin;
2074   *newmat = 0;
2075 
2076   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2077   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2078   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2079   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2080   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2081   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2082 
2083   mat->factortype   = matin->factortype;
2084   mat->preallocated = PETSC_TRUE;
2085   mat->assembled    = PETSC_TRUE;
2086   mat->insertmode   = NOT_SET_VALUES;
2087 
2088   a      = (Mat_MPISBAIJ*)mat->data;
2089   a->bs2 = oldmat->bs2;
2090   a->mbs = oldmat->mbs;
2091   a->nbs = oldmat->nbs;
2092   a->Mbs = oldmat->Mbs;
2093   a->Nbs = oldmat->Nbs;
2094 
2095 
2096   a->size         = oldmat->size;
2097   a->rank         = oldmat->rank;
2098   a->donotstash   = oldmat->donotstash;
2099   a->roworiented  = oldmat->roworiented;
2100   a->rowindices   = 0;
2101   a->rowvalues    = 0;
2102   a->getrowactive = PETSC_FALSE;
2103   a->barray       = 0;
2104   a->rstartbs     = oldmat->rstartbs;
2105   a->rendbs       = oldmat->rendbs;
2106   a->cstartbs     = oldmat->cstartbs;
2107   a->cendbs       = oldmat->cendbs;
2108 
2109   /* hash table stuff */
2110   a->ht           = 0;
2111   a->hd           = 0;
2112   a->ht_size      = 0;
2113   a->ht_flag      = oldmat->ht_flag;
2114   a->ht_fact      = oldmat->ht_fact;
2115   a->ht_total_ct  = 0;
2116   a->ht_insert_ct = 0;
2117 
2118   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2119   if (oldmat->colmap) {
2120 #if defined(PETSC_USE_CTABLE)
2121     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2122 #else
2123     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2124     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2125     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2126 #endif
2127   } else a->colmap = 0;
2128 
2129   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2130     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2131     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2132     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2133   } else a->garray = 0;
2134 
2135   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2136   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2138   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2140 
2141   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2142   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2143   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2145 
2146   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2147   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2148   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2149   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2150   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2151   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2152   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2153   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2154   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2158   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2159 
2160   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2161   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2162   a->sMvctx = oldmat->sMvctx;
2163   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2164 
2165   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2166   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2167   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2168   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2169   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2170   *newmat = mat;
2171   PetscFunctionReturn(0);
2172 }
2173 
2174 #undef __FUNCT__
2175 #define __FUNCT__ "MatLoad_MPISBAIJ"
2176 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2177 {
2178   PetscErrorCode ierr;
2179   PetscInt       i,nz,j,rstart,rend;
2180   PetscScalar    *vals,*buf;
2181   MPI_Comm       comm;
2182   MPI_Status     status;
2183   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2184   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2185   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2186   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2187   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2188   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2189   int            fd;
2190 
2191   PetscFunctionBegin;
2192   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2193   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2194   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2195   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2196 
2197   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2198   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2199   if (!rank) {
2200     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2201     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2202     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2203     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2204   }
2205 
2206   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2207 
2208   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2209   M    = header[1];
2210   N    = header[2];
2211 
2212   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2213   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2214   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2215 
2216   /* If global sizes are set, check if they are consistent with that given in the file */
2217   if (sizesset) {
2218     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2219   }
2220   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2221   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2222 
2223   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2224 
2225   /*
2226      This code adds extra rows to make sure the number of rows is
2227      divisible by the blocksize
2228   */
2229   Mbs        = M/bs;
2230   extra_rows = bs - M + bs*(Mbs);
2231   if (extra_rows == bs) extra_rows = 0;
2232   else                  Mbs++;
2233   if (extra_rows &&!rank) {
2234     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2235   }
2236 
2237   /* determine ownership of all rows */
2238   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2239     mbs = Mbs/size + ((Mbs % size) > rank);
2240     m   = mbs*bs;
2241   } else { /* User Set */
2242     m   = newmat->rmap->n;
2243     mbs = m/bs;
2244   }
2245   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2246   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2247   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2248   rowners[0] = 0;
2249   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2250   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2251   rstart = rowners[rank];
2252   rend   = rowners[rank+1];
2253 
2254   /* distribute row lengths to all processors */
2255   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2256   if (!rank) {
2257     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2258     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2259     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2260     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2261     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2262     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2263     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2264   } else {
2265     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2266   }
2267 
2268   if (!rank) {   /* procs[0] */
2269     /* calculate the number of nonzeros on each processor */
2270     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2271     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2272     for (i=0; i<size; i++) {
2273       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2274         procsnz[i] += rowlengths[j];
2275       }
2276     }
2277     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2278 
2279     /* determine max buffer needed and allocate it */
2280     maxnz = 0;
2281     for (i=0; i<size; i++) {
2282       maxnz = PetscMax(maxnz,procsnz[i]);
2283     }
2284     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2285 
2286     /* read in my part of the matrix column indices  */
2287     nz     = procsnz[0];
2288     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2289     mycols = ibuf;
2290     if (size == 1) nz -= extra_rows;
2291     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2292     if (size == 1) {
2293       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2294     }
2295 
2296     /* read in every ones (except the last) and ship off */
2297     for (i=1; i<size-1; i++) {
2298       nz   = procsnz[i];
2299       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2300       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2301     }
2302     /* read in the stuff for the last proc */
2303     if (size != 1) {
2304       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2305       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2306       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2307       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2308     }
2309     ierr = PetscFree(cols);CHKERRQ(ierr);
2310   } else {  /* procs[i], i>0 */
2311     /* determine buffer space needed for message */
2312     nz = 0;
2313     for (i=0; i<m; i++) nz += locrowlens[i];
2314     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2315     mycols = ibuf;
2316     /* receive message of column indices*/
2317     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2318     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2319     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2320   }
2321 
2322   /* loop over local rows, determining number of off diagonal entries */
2323   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2324   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2325   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2326   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2327   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2328   rowcount = 0;
2329   nzcount  = 0;
2330   for (i=0; i<mbs; i++) {
2331     dcount  = 0;
2332     odcount = 0;
2333     for (j=0; j<bs; j++) {
2334       kmax = locrowlens[rowcount];
2335       for (k=0; k<kmax; k++) {
2336         tmp = mycols[nzcount++]/bs; /* block col. index */
2337         if (!mask[tmp]) {
2338           mask[tmp] = 1;
2339           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2340           else masked1[dcount++] = tmp; /* entry in diag portion */
2341         }
2342       }
2343       rowcount++;
2344     }
2345 
2346     dlens[i]  = dcount;  /* d_nzz[i] */
2347     odlens[i] = odcount; /* o_nzz[i] */
2348 
2349     /* zero out the mask elements we set */
2350     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2351     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2352   }
2353   if (!sizesset) {
2354     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2355   }
2356   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2357   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2358 
2359   if (!rank) {
2360     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2361     /* read in my part of the matrix numerical values  */
2362     nz     = procsnz[0];
2363     vals   = buf;
2364     mycols = ibuf;
2365     if (size == 1) nz -= extra_rows;
2366     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2367     if (size == 1) {
2368       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2369     }
2370 
2371     /* insert into matrix */
2372     jj = rstart*bs;
2373     for (i=0; i<m; i++) {
2374       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2375       mycols += locrowlens[i];
2376       vals   += locrowlens[i];
2377       jj++;
2378     }
2379 
2380     /* read in other processors (except the last one) and ship out */
2381     for (i=1; i<size-1; i++) {
2382       nz   = procsnz[i];
2383       vals = buf;
2384       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2385       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2386     }
2387     /* the last proc */
2388     if (size != 1) {
2389       nz   = procsnz[i] - extra_rows;
2390       vals = buf;
2391       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2392       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2393       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2394     }
2395     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2396 
2397   } else {
2398     /* receive numeric values */
2399     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2400 
2401     /* receive message of values*/
2402     vals   = buf;
2403     mycols = ibuf;
2404     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2405     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2406     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2407 
2408     /* insert into matrix */
2409     jj = rstart*bs;
2410     for (i=0; i<m; i++) {
2411       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2412       mycols += locrowlens[i];
2413       vals   += locrowlens[i];
2414       jj++;
2415     }
2416   }
2417 
2418   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2419   ierr = PetscFree(buf);CHKERRQ(ierr);
2420   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2421   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2422   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2423   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2424   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2425   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2426   PetscFunctionReturn(0);
2427 }
2428 
2429 #undef __FUNCT__
2430 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2431 /*XXXXX@
2432    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2433 
2434    Input Parameters:
2435 .  mat  - the matrix
2436 .  fact - factor
2437 
2438    Not Collective on Mat, each process can have a different hash factor
2439 
2440    Level: advanced
2441 
2442   Notes:
2443    This can also be set by the command line option: -mat_use_hash_table fact
2444 
2445 .keywords: matrix, hashtable, factor, HT
2446 
2447 .seealso: MatSetOption()
2448 @XXXXX*/
2449 
2450 
2451 #undef __FUNCT__
2452 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2453 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2454 {
2455   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2456   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2457   PetscReal      atmp;
2458   PetscReal      *work,*svalues,*rvalues;
2459   PetscErrorCode ierr;
2460   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2461   PetscMPIInt    rank,size;
2462   PetscInt       *rowners_bs,dest,count,source;
2463   PetscScalar    *va;
2464   MatScalar      *ba;
2465   MPI_Status     stat;
2466 
2467   PetscFunctionBegin;
2468   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2469   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2470   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2471 
2472   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2473   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2474 
2475   bs  = A->rmap->bs;
2476   mbs = a->mbs;
2477   Mbs = a->Mbs;
2478   ba  = b->a;
2479   bi  = b->i;
2480   bj  = b->j;
2481 
2482   /* find ownerships */
2483   rowners_bs = A->rmap->range;
2484 
2485   /* each proc creates an array to be distributed */
2486   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2487   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2488 
2489   /* row_max for B */
2490   if (rank != size-1) {
2491     for (i=0; i<mbs; i++) {
2492       ncols = bi[1] - bi[0]; bi++;
2493       brow  = bs*i;
2494       for (j=0; j<ncols; j++) {
2495         bcol = bs*(*bj);
2496         for (kcol=0; kcol<bs; kcol++) {
2497           col  = bcol + kcol;                /* local col index */
2498           col += rowners_bs[rank+1];      /* global col index */
2499           for (krow=0; krow<bs; krow++) {
2500             atmp = PetscAbsScalar(*ba); ba++;
2501             row  = brow + krow;   /* local row index */
2502             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2503             if (work[col] < atmp) work[col] = atmp;
2504           }
2505         }
2506         bj++;
2507       }
2508     }
2509 
2510     /* send values to its owners */
2511     for (dest=rank+1; dest<size; dest++) {
2512       svalues = work + rowners_bs[dest];
2513       count   = rowners_bs[dest+1]-rowners_bs[dest];
2514       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2515     }
2516   }
2517 
2518   /* receive values */
2519   if (rank) {
2520     rvalues = work;
2521     count   = rowners_bs[rank+1]-rowners_bs[rank];
2522     for (source=0; source<rank; source++) {
2523       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2524       /* process values */
2525       for (i=0; i<count; i++) {
2526         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2527       }
2528     }
2529   }
2530 
2531   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2532   ierr = PetscFree(work);CHKERRQ(ierr);
2533   PetscFunctionReturn(0);
2534 }
2535 
2536 #undef __FUNCT__
2537 #define __FUNCT__ "MatSOR_MPISBAIJ"
2538 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2539 {
2540   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2541   PetscErrorCode    ierr;
2542   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2543   PetscScalar       *x,*ptr,*from;
2544   Vec               bb1;
2545   const PetscScalar *b;
2546 
2547   PetscFunctionBegin;
2548   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2549   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2550 
2551   if (flag == SOR_APPLY_UPPER) {
2552     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2553     PetscFunctionReturn(0);
2554   }
2555 
2556   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2557     if (flag & SOR_ZERO_INITIAL_GUESS) {
2558       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2559       its--;
2560     }
2561 
2562     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2563     while (its--) {
2564 
2565       /* lower triangular part: slvec0b = - B^T*xx */
2566       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2567 
2568       /* copy xx into slvec0a */
2569       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2570       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2571       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2572       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2573 
2574       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2575 
2576       /* copy bb into slvec1a */
2577       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2578       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2579       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2580       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2581 
2582       /* set slvec1b = 0 */
2583       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2584 
2585       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2586       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2587       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2588       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2589 
2590       /* upper triangular part: bb1 = bb1 - B*x */
2591       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2592 
2593       /* local diagonal sweep */
2594       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2595     }
2596     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2597   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2598     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2599   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2601   } else if (flag & SOR_EISENSTAT) {
2602     Vec               xx1;
2603     PetscBool         hasop;
2604     const PetscScalar *diag;
2605     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2606     PetscInt          i,n;
2607 
2608     if (!mat->xx1) {
2609       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2610       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2611     }
2612     xx1 = mat->xx1;
2613     bb1 = mat->bb1;
2614 
2615     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2616 
2617     if (!mat->diag) {
2618       /* this is wrong for same matrix with new nonzero values */
2619       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2620       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2621     }
2622     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2623 
2624     if (hasop) {
2625       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2626       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2627     } else {
2628       /*
2629           These two lines are replaced by code that may be a bit faster for a good compiler
2630       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2631       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2632       */
2633       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2634       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2635       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2636       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2637       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2638       if (omega == 1.0) {
2639         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2640         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2641       } else {
2642         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2643         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2644       }
2645       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2646       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2647       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2648       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2649     }
2650 
2651     /* multiply off-diagonal portion of matrix */
2652     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2653     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2654     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2655     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2656     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2657     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2658     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2659     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2660     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2661     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2662 
2663     /* local sweep */
2664     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2665     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2666   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2667   PetscFunctionReturn(0);
2668 }
2669 
2670 #undef __FUNCT__
2671 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2672 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2673 {
2674   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2675   PetscErrorCode ierr;
2676   Vec            lvec1,bb1;
2677 
2678   PetscFunctionBegin;
2679   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2680   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2681 
2682   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2683     if (flag & SOR_ZERO_INITIAL_GUESS) {
2684       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2685       its--;
2686     }
2687 
2688     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2689     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2690     while (its--) {
2691       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2692 
2693       /* lower diagonal part: bb1 = bb - B^T*xx */
2694       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2695       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2696 
2697       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2698       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2699       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2700 
2701       /* upper diagonal part: bb1 = bb1 - B*x */
2702       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2703       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2704 
2705       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2706 
2707       /* diagonal sweep */
2708       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2709     }
2710     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2711     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2712   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2713   PetscFunctionReturn(0);
2714 }
2715 
2716 #undef __FUNCT__
2717 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2718 /*@
2719      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2720          CSR format the local rows.
2721 
2722    Collective on MPI_Comm
2723 
2724    Input Parameters:
2725 +  comm - MPI communicator
2726 .  bs - the block size, only a block size of 1 is supported
2727 .  m - number of local rows (Cannot be PETSC_DECIDE)
2728 .  n - This value should be the same as the local size used in creating the
2729        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2730        calculated if N is given) For square matrices n is almost always m.
2731 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2732 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2733 .   i - row indices
2734 .   j - column indices
2735 -   a - matrix values
2736 
2737    Output Parameter:
2738 .   mat - the matrix
2739 
2740    Level: intermediate
2741 
2742    Notes:
2743        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2744      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2745      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2746 
2747        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2748 
2749 .keywords: matrix, aij, compressed row, sparse, parallel
2750 
2751 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2752           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2753 @*/
2754 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2755 {
2756   PetscErrorCode ierr;
2757 
2758 
2759   PetscFunctionBegin;
2760   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2761   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2762   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2763   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2764   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2765   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2766   PetscFunctionReturn(0);
2767 }
2768 
2769 
2770 #undef __FUNCT__
2771 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2772 /*@C
2773    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2774    (the default parallel PETSc format).
2775 
2776    Collective on MPI_Comm
2777 
2778    Input Parameters:
2779 +  A - the matrix
2780 .  bs - the block size
2781 .  i - the indices into j for the start of each local row (starts with zero)
2782 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2783 -  v - optional values in the matrix
2784 
2785    Level: developer
2786 
2787 .keywords: matrix, aij, compressed row, sparse, parallel
2788 
2789 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2790 @*/
2791 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2792 {
2793   PetscErrorCode ierr;
2794 
2795   PetscFunctionBegin;
2796   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2797   PetscFunctionReturn(0);
2798 }
2799 
2800 
2801