xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision fc9b51d31e2be1d1d6cc93760bb648055e237fa2)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84     A->nonzerostate++;\
85 a_noinsert:; \
86     ailen[brow] = nrow; \
87   }
88 
89 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90   { \
91     brow = row/bs;  \
92     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93     rmax = bimax[brow]; nrow = bilen[brow]; \
94     bcol = col/bs; \
95     ridx = row % bs; cidx = col % bs; \
96     low  = 0; high = nrow; \
97     while (high-low > 3) { \
98       t = (low+high)/2; \
99       if (rp[t] > bcol) high = t; \
100       else              low  = t; \
101     } \
102     for (_i=low; _i<high; _i++) { \
103       if (rp[_i] > bcol) break; \
104       if (rp[_i] == bcol) { \
105         bap = ap + bs2*_i + bs*cidx + ridx; \
106         if (addv == ADD_VALUES) *bap += value;  \
107         else                    *bap  = value;  \
108         goto b_noinsert; \
109       } \
110     } \
111     if (b->nonew == 1) goto b_noinsert; \
112     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114     N = nrow++ - 1;  \
115     /* shift up all the later entries in this row */ \
116     for (ii=N; ii>=_i; ii--) { \
117       rp[ii+1] = rp[ii]; \
118       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
119     } \
120     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
121     rp[_i]                      = bcol;  \
122     ap[bs2*_i + bs*cidx + ridx] = value;  \
123     B->nonzerostate++;\
124 b_noinsert:; \
125     bilen[brow] = nrow; \
126   }
127 
128 /* Only add/insert a(i,j) with i<=j (blocks).
129    Any a(i,j) with i>j input by user is ingored.
130 */
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSetValues_MPISBAIJ"
133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134 {
135   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
136   MatScalar      value;
137   PetscBool      roworiented = baij->roworiented;
138   PetscErrorCode ierr;
139   PetscInt       i,j,row,col;
140   PetscInt       rstart_orig=mat->rmap->rstart;
141   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
143 
144   /* Some Variables required in the macro */
145   Mat          A     = baij->A;
146   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
147   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148   MatScalar    *aa   =a->a;
149 
150   Mat         B     = baij->B;
151   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
152   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153   MatScalar   *ba   =b->a;
154 
155   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
156   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
157   MatScalar *ap,*bap;
158 
159   /* for stash */
160   PetscInt  n_loc, *in_loc = NULL;
161   MatScalar *v_loc = NULL;
162 
163   PetscFunctionBegin;
164   if (v) PetscValidScalarPointer(v,6);
165   if (!baij->donotstash) {
166     if (n > baij->n_loc) {
167       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
168       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
169       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
171 
172       baij->n_loc = n;
173     }
174     in_loc = baij->in_loc;
175     v_loc  = baij->v_loc;
176   }
177 
178   for (i=0; i<m; i++) {
179     if (im[i] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
182 #endif
183     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
184       row = im[i] - rstart_orig;              /* local row index */
185       for (j=0; j<n; j++) {
186         if (im[i]/bs > in[j]/bs) {
187           if (a->ignore_ltriangular) {
188             continue;    /* ignore lower triangular blocks */
189           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
190         }
191         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
192           col  = in[j] - cstart_orig;         /* local col index */
193           brow = row/bs; bcol = col/bs;
194           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
195           if (roworiented) value = v[i*n+j];
196           else             value = v[i+j*m];
197           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
198           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
199         } else if (in[j] < 0) continue;
200 #if defined(PETSC_USE_DEBUG)
201         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
202 #endif
203         else {  /* off-diag entry (B) */
204           if (mat->was_assembled) {
205             if (!baij->colmap) {
206               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
207             }
208 #if defined(PETSC_USE_CTABLE)
209             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
210             col  = col - 1;
211 #else
212             col = baij->colmap[in[j]/bs] - 1;
213 #endif
214             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
215               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
216               col  =  in[j];
217               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
218               B    = baij->B;
219               b    = (Mat_SeqBAIJ*)(B)->data;
220               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
221               ba   = b->a;
222             } else col += in[j]%bs;
223           } else col = in[j];
224           if (roworiented) value = v[i*n+j];
225           else             value = v[i+j*m];
226           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         }
229       }
230     } else {  /* off processor entry */
231       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
232       if (!baij->donotstash) {
233         mat->assembled = PETSC_FALSE;
234         n_loc          = 0;
235         for (j=0; j<n; j++) {
236           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
237           in_loc[n_loc] = in[j];
238           if (roworiented) {
239             v_loc[n_loc] = v[i*n+j];
240           } else {
241             v_loc[n_loc] = v[j*m+i];
242           }
243           n_loc++;
244         }
245         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
246       }
247     }
248   }
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNCT__
253 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
254 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
255 {
256   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
257   const MatScalar *value;
258   MatScalar       *barray     =baij->barray;
259   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
260   PetscErrorCode  ierr;
261   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
262   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
263   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
264 
265   PetscFunctionBegin;
266   if (!barray) {
267     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
268     baij->barray = barray;
269   }
270 
271   if (roworiented) {
272     stepval = (n-1)*bs;
273   } else {
274     stepval = (m-1)*bs;
275   }
276   for (i=0; i<m; i++) {
277     if (im[i] < 0) continue;
278 #if defined(PETSC_USE_DEBUG)
279     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
280 #endif
281     if (im[i] >= rstart && im[i] < rend) {
282       row = im[i] - rstart;
283       for (j=0; j<n; j++) {
284         if (im[i] > in[j]) {
285           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
286           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
287         }
288         /* If NumCol = 1 then a copy is not required */
289         if ((roworiented) && (n == 1)) {
290           barray = (MatScalar*) v + i*bs2;
291         } else if ((!roworiented) && (m == 1)) {
292           barray = (MatScalar*) v + j*bs2;
293         } else { /* Here a copy is required */
294           if (roworiented) {
295             value = v + i*(stepval+bs)*bs + j*bs;
296           } else {
297             value = v + j*(stepval+bs)*bs + i*bs;
298           }
299           for (ii=0; ii<bs; ii++,value+=stepval) {
300             for (jj=0; jj<bs; jj++) {
301               *barray++ = *value++;
302             }
303           }
304           barray -=bs2;
305         }
306 
307         if (in[j] >= cstart && in[j] < cend) {
308           col  = in[j] - cstart;
309           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
310         } else if (in[j] < 0) continue;
311 #if defined(PETSC_USE_DEBUG)
312         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
313 #endif
314         else {
315           if (mat->was_assembled) {
316             if (!baij->colmap) {
317               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
318             }
319 
320 #if defined(PETSC_USE_DEBUG)
321 #if defined(PETSC_USE_CTABLE)
322             { PetscInt data;
323               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
324               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
325             }
326 #else
327             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328 #endif
329 #endif
330 #if defined(PETSC_USE_CTABLE)
331             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
332             col  = (col - 1)/bs;
333 #else
334             col = (baij->colmap[in[j]] - 1)/bs;
335 #endif
336             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
337               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
338               col  = in[j];
339             }
340           } else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
346       if (!baij->donotstash) {
347         if (roworiented) {
348           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
349         } else {
350           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         }
352       }
353     }
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "MatGetValues_MPISBAIJ"
360 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
361 {
362   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
365   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
366 
367   PetscFunctionBegin;
368   for (i=0; i<m; i++) {
369     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
370     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
371     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
372       row = idxm[i] - bsrstart;
373       for (j=0; j<n; j++) {
374         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
375         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
376         if (idxn[j] >= bscstart && idxn[j] < bscend) {
377           col  = idxn[j] - bscstart;
378           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
379         } else {
380           if (!baij->colmap) {
381             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
382           }
383 #if defined(PETSC_USE_CTABLE)
384           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
385           data--;
386 #else
387           data = baij->colmap[idxn[j]/bs]-1;
388 #endif
389           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
390           else {
391             col  = data + idxn[j]%bs;
392             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
393           }
394         }
395       }
396     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
397   }
398   PetscFunctionReturn(0);
399 }
400 
401 #undef __FUNCT__
402 #define __FUNCT__ "MatNorm_MPISBAIJ"
403 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
404 {
405   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
406   PetscErrorCode ierr;
407   PetscReal      sum[2],*lnorm2;
408 
409   PetscFunctionBegin;
410   if (baij->size == 1) {
411     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
412   } else {
413     if (type == NORM_FROBENIUS) {
414       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
415       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
416       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
417       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
419       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
420       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
421       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
422     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
423       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
424       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
425       PetscReal    *rsum,*rsum2,vabs;
426       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
427       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
428       MatScalar    *v;
429 
430       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
431       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
432       /* Amat */
433       v = amat->a; jj = amat->j;
434       for (brow=0; brow<mbs; brow++) {
435         grow = bs*(rstart + brow);
436         nz   = amat->i[brow+1] - amat->i[brow];
437         for (bcol=0; bcol<nz; bcol++) {
438           gcol = bs*(rstart + *jj); jj++;
439           for (col=0; col<bs; col++) {
440             for (row=0; row<bs; row++) {
441               vabs            = PetscAbsScalar(*v); v++;
442               rsum[gcol+col] += vabs;
443               /* non-diagonal block */
444               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
445             }
446           }
447         }
448       }
449       /* Bmat */
450       v = bmat->a; jj = bmat->j;
451       for (brow=0; brow<mbs; brow++) {
452         grow = bs*(rstart + brow);
453         nz = bmat->i[brow+1] - bmat->i[brow];
454         for (bcol=0; bcol<nz; bcol++) {
455           gcol = bs*garray[*jj]; jj++;
456           for (col=0; col<bs; col++) {
457             for (row=0; row<bs; row++) {
458               vabs            = PetscAbsScalar(*v); v++;
459               rsum[gcol+col] += vabs;
460               rsum[grow+row] += vabs;
461             }
462           }
463         }
464       }
465       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
466       *norm = 0.0;
467       for (col=0; col<mat->cmap->N; col++) {
468         if (rsum2[col] > *norm) *norm = rsum2[col];
469       }
470       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
471     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
472   }
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
478 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
481   PetscErrorCode ierr;
482   PetscInt       nstash,reallocs;
483   InsertMode     addv;
484 
485   PetscFunctionBegin;
486   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
487 
488   /* make sure all processors are either in INSERTMODE or ADDMODE */
489   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
490   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
491   mat->insertmode = addv; /* in case this processor had no cache */
492 
493   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
494   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
495   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
496   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
504 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
505 {
506   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
507   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
508   PetscErrorCode ierr;
509   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
510   PetscInt       *row,*col;
511   PetscBool      other_disassembled;
512   PetscMPIInt    n;
513   PetscBool      r1,r2,r3;
514   MatScalar      *val;
515   InsertMode     addv = mat->insertmode;
516 
517   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
518   PetscFunctionBegin;
519   if (!baij->donotstash &&  !mat->nooffprocentries) {
520     while (1) {
521       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
522       if (!flg) break;
523 
524       for (i=0; i<n;) {
525         /* Now identify the consecutive vals belonging to the same row */
526         for (j=i,rstart=row[j]; j<n; j++) {
527           if (row[j] != rstart) break;
528         }
529         if (j < n) ncols = j-i;
530         else       ncols = n-i;
531         /* Now assemble all these values with a single function call */
532         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
533         i    = j;
534       }
535     }
536     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
537     /* Now process the block-stash. Since the values are stashed column-oriented,
538        set the roworiented flag to column oriented, and after MatSetValues()
539        restore the original flags */
540     r1 = baij->roworiented;
541     r2 = a->roworiented;
542     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
543 
544     baij->roworiented = PETSC_FALSE;
545     a->roworiented    = PETSC_FALSE;
546 
547     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
548     while (1) {
549       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
550       if (!flg) break;
551 
552       for (i=0; i<n;) {
553         /* Now identify the consecutive vals belonging to the same row */
554         for (j=i,rstart=row[j]; j<n; j++) {
555           if (row[j] != rstart) break;
556         }
557         if (j < n) ncols = j-i;
558         else       ncols = n-i;
559         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
560         i    = j;
561       }
562     }
563     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
564 
565     baij->roworiented = r1;
566     a->roworiented    = r2;
567 
568     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
569   }
570 
571   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
572   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
573 
574   /* determine if any processor has disassembled, if so we must
575      also disassemble ourselfs, in order that we may reassemble. */
576   /*
577      if nonzero structure of submatrix B cannot change then we know that
578      no processor disassembled thus we can skip this stuff
579   */
580   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
581     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
582     if (mat->was_assembled && !other_disassembled) {
583       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
584     }
585   }
586 
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
589   }
590   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
591   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
592 
593   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
594 
595   baij->rowvalues = 0;
596 
597   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
598   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
599     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
600     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
606 #include <petscdraw.h>
607 #undef __FUNCT__
608 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
609 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
610 {
611   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
612   PetscErrorCode    ierr;
613   PetscInt          bs   = mat->rmap->bs;
614   PetscMPIInt       size = baij->size,rank = baij->rank;
615   PetscBool         iascii,isdraw;
616   PetscViewer       sviewer;
617   PetscViewerFormat format;
618 
619   PetscFunctionBegin;
620   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
621   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
622   if (iascii) {
623     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
624     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
625       MatInfo info;
626       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
627       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
630       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
632       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
633       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
634       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
635       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
636       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
637       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_INFO) {
640       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
641       PetscFunctionReturn(0);
642     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
643       PetscFunctionReturn(0);
644     }
645   }
646 
647   if (isdraw) {
648     PetscDraw draw;
649     PetscBool isnull;
650     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
651     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
652   }
653 
654   if (size == 1) {
655     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
656     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
657   } else {
658     /* assemble the entire matrix onto first processor. */
659     Mat          A;
660     Mat_SeqSBAIJ *Aloc;
661     Mat_SeqBAIJ  *Bloc;
662     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
663     MatScalar    *a;
664 
665     /* Should this be the same type as mat? */
666     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
667     if (!rank) {
668       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
669     } else {
670       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
671     }
672     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
673     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
674     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
675     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
676 
677     /* copy over the A part */
678     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
679     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
680     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
681 
682     for (i=0; i<mbs; i++) {
683       rvals[0] = bs*(baij->rstartbs + i);
684       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
685       for (j=ai[i]; j<ai[i+1]; j++) {
686         col = (baij->cstartbs+aj[j])*bs;
687         for (k=0; k<bs; k++) {
688           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
689           col++;
690           a += bs;
691         }
692       }
693     }
694     /* copy over the B part */
695     Bloc = (Mat_SeqBAIJ*)baij->B->data;
696     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
697     for (i=0; i<mbs; i++) {
698 
699       rvals[0] = bs*(baij->rstartbs + i);
700       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
701       for (j=ai[i]; j<ai[i+1]; j++) {
702         col = baij->garray[aj[j]]*bs;
703         for (k=0; k<bs; k++) {
704           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
705           col++;
706           a += bs;
707         }
708       }
709     }
710     ierr = PetscFree(rvals);CHKERRQ(ierr);
711     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
712     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
713     /*
714        Everyone has to call to draw the matrix since the graphics waits are
715        synchronized across all processors that share the PetscDraw object
716     */
717     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     if (!rank) {
719       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
720       /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
721       ierr = PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);CHKERRQ(ierr);
722       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
723     }
724     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
725     ierr = MatDestroy(&A);CHKERRQ(ierr);
726   }
727   PetscFunctionReturn(0);
728 }
729 
730 #undef __FUNCT__
731 #define __FUNCT__ "MatView_MPISBAIJ"
732 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
733 {
734   PetscErrorCode ierr;
735   PetscBool      iascii,isdraw,issocket,isbinary;
736 
737   PetscFunctionBegin;
738   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
739   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
740   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
741   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
742   if (iascii || isdraw || issocket || isbinary) {
743     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
744   }
745   PetscFunctionReturn(0);
746 }
747 
748 #undef __FUNCT__
749 #define __FUNCT__ "MatDestroy_MPISBAIJ"
750 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
751 {
752   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
753   PetscErrorCode ierr;
754 
755   PetscFunctionBegin;
756 #if defined(PETSC_USE_LOG)
757   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
758 #endif
759   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
760   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
761   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
762   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
763 #if defined(PETSC_USE_CTABLE)
764   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
765 #else
766   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
767 #endif
768   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
770   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
776   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
777   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
778   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
779   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
780   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
781   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
782   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
783 #if defined(PETSC_USE_REAL_MAT_SINGLE)
784   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
785 #endif
786   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
787   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
788   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
789   ierr = PetscFree(mat->data);CHKERRQ(ierr);
790 
791   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
794   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
795   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
796   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
797   PetscFunctionReturn(0);
798 }
799 
800 #undef __FUNCT__
801 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
802 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
803 {
804   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
805   PetscErrorCode ierr;
806   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
807   PetscScalar    *x,*from;
808 
809   PetscFunctionBegin;
810   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
811   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
812 
813   /* diagonal part */
814   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
815   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
816 
817   /* subdiagonal part */
818   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
819 
820   /* copy x into the vec slvec0 */
821   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
822   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
823 
824   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
825   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
826   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
827 
828   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
829   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
830   /* supperdiagonal part */
831   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
832   PetscFunctionReturn(0);
833 }
834 
835 #undef __FUNCT__
836 #define __FUNCT__ "MatMult_MPISBAIJ"
837 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
838 {
839   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
840   PetscErrorCode ierr;
841   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
842   PetscScalar    *x,*from;
843 
844   PetscFunctionBegin;
845   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
846   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
847 
848   /* diagonal part */
849   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
850   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
851 
852   /* subdiagonal part */
853   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
854 
855   /* copy x into the vec slvec0 */
856   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
857   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
858 
859   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
860   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
861   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
862 
863   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
864   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
865   /* supperdiagonal part */
866   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
867   PetscFunctionReturn(0);
868 }
869 
870 #undef __FUNCT__
871 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
872 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
873 {
874   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
875   PetscErrorCode ierr;
876   PetscInt       nt;
877 
878   PetscFunctionBegin;
879   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
880   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
881 
882   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
883   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
884 
885   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
886   /* do diagonal part */
887   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
888   /* do supperdiagonal part */
889   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
890   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
891   /* do subdiagonal part */
892   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
893   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
894   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
900 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
901 {
902   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
903   PetscErrorCode ierr;
904   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
905   PetscScalar    *x,*from,zero=0.0;
906 
907   PetscFunctionBegin;
908   /*
909   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
910   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
911   */
912   /* diagonal part */
913   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
914   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
915 
916   /* subdiagonal part */
917   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
918 
919   /* copy x into the vec slvec0 */
920   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
921   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
922   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
923   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
924 
925   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
927   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928 
929   /* supperdiagonal part */
930   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
936 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
939   PetscErrorCode ierr;
940 
941   PetscFunctionBegin;
942   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   /* do diagonal part */
944   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
945   /* do supperdiagonal part */
946   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
947   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
948 
949   /* do subdiagonal part */
950   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
951   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) max = tmp;
1011     }
1012     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1049             else break;
1050           }
1051           imark = i;
1052         }
1053         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1054         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1055       }
1056     } else {
1057       if (idx) *idx = 0;
1058       if (v)   *v   = 0;
1059     }
1060   }
1061   *nz  = nztot;
1062   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1063   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1064   PetscFunctionReturn(0);
1065 }
1066 
1067 #undef __FUNCT__
1068 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1069 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1070 {
1071   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1072 
1073   PetscFunctionBegin;
1074   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1075   baij->getrowactive = PETSC_FALSE;
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1081 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1082 {
1083   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1084   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1085 
1086   PetscFunctionBegin;
1087   aA->getrow_utriangular = PETSC_TRUE;
1088   PetscFunctionReturn(0);
1089 }
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1092 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1093 {
1094   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1095   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1096 
1097   PetscFunctionBegin;
1098   aA->getrow_utriangular = PETSC_FALSE;
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1104 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1105 {
1106   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1107   PetscErrorCode ierr;
1108 
1109   PetscFunctionBegin;
1110   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1111   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1117 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1118 {
1119   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1120   PetscErrorCode ierr;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1124   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1130 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1131 {
1132   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1133   PetscErrorCode ierr;
1134 
1135   PetscFunctionBegin;
1136   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1137   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1143 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1144 {
1145   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1146   Mat            A  = a->A,B = a->B;
1147   PetscErrorCode ierr;
1148   PetscReal      isend[5],irecv[5];
1149 
1150   PetscFunctionBegin;
1151   info->block_size = (PetscReal)matin->rmap->bs;
1152 
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154 
1155   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1156   isend[3] = info->memory;  isend[4] = info->mallocs;
1157 
1158   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1159 
1160   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1161   isend[3] += info->memory;  isend[4] += info->mallocs;
1162   if (flag == MAT_LOCAL) {
1163     info->nz_used      = isend[0];
1164     info->nz_allocated = isend[1];
1165     info->nz_unneeded  = isend[2];
1166     info->memory       = isend[3];
1167     info->mallocs      = isend[4];
1168   } else if (flag == MAT_GLOBAL_MAX) {
1169     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1170 
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else if (flag == MAT_GLOBAL_SUM) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1178 
1179     info->nz_used      = irecv[0];
1180     info->nz_allocated = irecv[1];
1181     info->nz_unneeded  = irecv[2];
1182     info->memory       = irecv[3];
1183     info->mallocs      = irecv[4];
1184   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1185   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1186   info->fill_ratio_needed = 0;
1187   info->factor_mallocs    = 0;
1188   PetscFunctionReturn(0);
1189 }
1190 
1191 #undef __FUNCT__
1192 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1193 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1194 {
1195   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1196   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   switch (op) {
1201   case MAT_NEW_NONZERO_LOCATIONS:
1202   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1203   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1204   case MAT_KEEP_NONZERO_PATTERN:
1205   case MAT_NEW_NONZERO_LOCATION_ERR:
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_ROW_ORIENTED:
1210     a->roworiented = flg;
1211 
1212     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1214     break;
1215   case MAT_NEW_DIAGONALS:
1216     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1217     break;
1218   case MAT_IGNORE_OFF_PROC_ENTRIES:
1219     a->donotstash = flg;
1220     break;
1221   case MAT_USE_HASH_TABLE:
1222     a->ht_flag = flg;
1223     break;
1224   case MAT_HERMITIAN:
1225     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1226     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1227 
1228     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1229     break;
1230   case MAT_SPD:
1231     A->spd_set = PETSC_TRUE;
1232     A->spd     = flg;
1233     if (flg) {
1234       A->symmetric                  = PETSC_TRUE;
1235       A->structurally_symmetric     = PETSC_TRUE;
1236       A->symmetric_set              = PETSC_TRUE;
1237       A->structurally_symmetric_set = PETSC_TRUE;
1238     }
1239     break;
1240   case MAT_SYMMETRIC:
1241     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1242     break;
1243   case MAT_STRUCTURALLY_SYMMETRIC:
1244     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1245     break;
1246   case MAT_SYMMETRY_ETERNAL:
1247     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1248     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1249     break;
1250   case MAT_IGNORE_LOWER_TRIANGULAR:
1251     aA->ignore_ltriangular = flg;
1252     break;
1253   case MAT_ERROR_LOWER_TRIANGULAR:
1254     aA->ignore_ltriangular = flg;
1255     break;
1256   case MAT_GETROW_UPPERTRIANGULAR:
1257     aA->getrow_utriangular = flg;
1258     break;
1259   default:
1260     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1261   }
1262   PetscFunctionReturn(0);
1263 }
1264 
1265 #undef __FUNCT__
1266 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1267 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1268 {
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272   if (MAT_INITIAL_MATRIX || *B != A) {
1273     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1280 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1281 {
1282   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1283   Mat            a     = baij->A, b=baij->B;
1284   PetscErrorCode ierr;
1285   PetscInt       nv,m,n;
1286   PetscBool      flg;
1287 
1288   PetscFunctionBegin;
1289   if (ll != rr) {
1290     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1291     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1292   }
1293   if (!ll) PetscFunctionReturn(0);
1294 
1295   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1296   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1297 
1298   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1299   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1300 
1301   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1302 
1303   /* left diagonalscale the off-diagonal part */
1304   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1305 
1306   /* scale the diagonal part */
1307   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1308 
1309   /* right diagonalscale the off-diagonal part */
1310   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1311   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1312   PetscFunctionReturn(0);
1313 }
1314 
1315 #undef __FUNCT__
1316 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1317 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1318 {
1319   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1320   PetscErrorCode ierr;
1321 
1322   PetscFunctionBegin;
1323   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1324   PetscFunctionReturn(0);
1325 }
1326 
1327 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1328 
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatEqual_MPISBAIJ"
1331 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1332 {
1333   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1334   Mat            a,b,c,d;
1335   PetscBool      flg;
1336   PetscErrorCode ierr;
1337 
1338   PetscFunctionBegin;
1339   a = matA->A; b = matA->B;
1340   c = matB->A; d = matB->B;
1341 
1342   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1343   if (flg) {
1344     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1345   }
1346   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatCopy_MPISBAIJ"
1352 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1353 {
1354   PetscErrorCode ierr;
1355   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1356   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1357 
1358   PetscFunctionBegin;
1359   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1360   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1361     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1362     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1363     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1364   } else {
1365     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1366     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1367   }
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 #undef __FUNCT__
1372 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1373 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1374 {
1375   PetscErrorCode ierr;
1376 
1377   PetscFunctionBegin;
1378   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1384 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1385 {
1386   PetscErrorCode ierr;
1387   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1388   PetscBLASInt   bnz,one=1;
1389   Mat_SeqSBAIJ   *xa,*ya;
1390   Mat_SeqBAIJ    *xb,*yb;
1391 
1392   PetscFunctionBegin;
1393   if (str == SAME_NONZERO_PATTERN) {
1394     PetscScalar alpha = a;
1395     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1396     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1397     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1398     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1399     xb   = (Mat_SeqBAIJ*)xx->B->data;
1400     yb   = (Mat_SeqBAIJ*)yy->B->data;
1401     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1402     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1403   } else {
1404     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1405     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1406     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1414 {
1415   PetscErrorCode ierr;
1416   PetscInt       i;
1417   PetscBool      flg;
1418 
1419   PetscFunctionBegin;
1420   for (i=0; i<n; i++) {
1421     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1422     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1423   }
1424   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 /* -------------------------------------------------------------------*/
1430 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1431                                        MatGetRow_MPISBAIJ,
1432                                        MatRestoreRow_MPISBAIJ,
1433                                        MatMult_MPISBAIJ,
1434                                /*  4*/ MatMultAdd_MPISBAIJ,
1435                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1436                                        MatMultAdd_MPISBAIJ,
1437                                        0,
1438                                        0,
1439                                        0,
1440                                /* 10*/ 0,
1441                                        0,
1442                                        0,
1443                                        MatSOR_MPISBAIJ,
1444                                        MatTranspose_MPISBAIJ,
1445                                /* 15*/ MatGetInfo_MPISBAIJ,
1446                                        MatEqual_MPISBAIJ,
1447                                        MatGetDiagonal_MPISBAIJ,
1448                                        MatDiagonalScale_MPISBAIJ,
1449                                        MatNorm_MPISBAIJ,
1450                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1451                                        MatAssemblyEnd_MPISBAIJ,
1452                                        MatSetOption_MPISBAIJ,
1453                                        MatZeroEntries_MPISBAIJ,
1454                                /* 24*/ 0,
1455                                        0,
1456                                        0,
1457                                        0,
1458                                        0,
1459                                /* 29*/ MatSetUp_MPISBAIJ,
1460                                        0,
1461                                        0,
1462                                        0,
1463                                        0,
1464                                /* 34*/ MatDuplicate_MPISBAIJ,
1465                                        0,
1466                                        0,
1467                                        0,
1468                                        0,
1469                                /* 39*/ MatAXPY_MPISBAIJ,
1470                                        MatGetSubMatrices_MPISBAIJ,
1471                                        MatIncreaseOverlap_MPISBAIJ,
1472                                        MatGetValues_MPISBAIJ,
1473                                        MatCopy_MPISBAIJ,
1474                                /* 44*/ 0,
1475                                        MatScale_MPISBAIJ,
1476                                        0,
1477                                        0,
1478                                        0,
1479                                /* 49*/ 0,
1480                                        0,
1481                                        0,
1482                                        0,
1483                                        0,
1484                                /* 54*/ 0,
1485                                        0,
1486                                        MatSetUnfactored_MPISBAIJ,
1487                                        0,
1488                                        MatSetValuesBlocked_MPISBAIJ,
1489                                /* 59*/ 0,
1490                                        0,
1491                                        0,
1492                                        0,
1493                                        0,
1494                                /* 64*/ 0,
1495                                        0,
1496                                        0,
1497                                        0,
1498                                        0,
1499                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1500                                        0,
1501                                        0,
1502                                        0,
1503                                        0,
1504                                /* 74*/ 0,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                        0,
1509                                /* 79*/ 0,
1510                                        0,
1511                                        0,
1512                                        0,
1513                                        MatLoad_MPISBAIJ,
1514                                /* 84*/ 0,
1515                                        0,
1516                                        0,
1517                                        0,
1518                                        0,
1519                                /* 89*/ 0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                        0,
1524                                /* 94*/ 0,
1525                                        0,
1526                                        0,
1527                                        0,
1528                                        0,
1529                                /* 99*/ 0,
1530                                        0,
1531                                        0,
1532                                        0,
1533                                        0,
1534                                /*104*/ 0,
1535                                        MatRealPart_MPISBAIJ,
1536                                        MatImaginaryPart_MPISBAIJ,
1537                                        MatGetRowUpperTriangular_MPISBAIJ,
1538                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1539                                /*109*/ 0,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                        0,
1544                                /*114*/ 0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                        0,
1549                                /*119*/ 0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                        0,
1554                                /*124*/ 0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                        0,
1559                                /*129*/ 0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                        0,
1564                                /*134*/ 0,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                        0,
1569                                /*139*/ 0,
1570                                        0,
1571                                        0
1572 };
1573 
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1577 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1578 {
1579   PetscFunctionBegin;
1580   *a = ((Mat_MPISBAIJ*)A->data)->A;
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1586 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1587 {
1588   Mat_MPISBAIJ   *b;
1589   PetscErrorCode ierr;
1590   PetscInt       i,mbs,Mbs;
1591 
1592   PetscFunctionBegin;
1593   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1594   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1595   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1596   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1597 
1598   b   = (Mat_MPISBAIJ*)B->data;
1599   mbs = B->rmap->n/bs;
1600   Mbs = B->rmap->N/bs;
1601   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1602 
1603   B->rmap->bs = bs;
1604   b->bs2      = bs*bs;
1605   b->mbs      = mbs;
1606   b->nbs      = mbs;
1607   b->Mbs      = Mbs;
1608   b->Nbs      = Mbs;
1609 
1610   for (i=0; i<=b->size; i++) {
1611     b->rangebs[i] = B->rmap->range[i]/bs;
1612   }
1613   b->rstartbs = B->rmap->rstart/bs;
1614   b->rendbs   = B->rmap->rend/bs;
1615 
1616   b->cstartbs = B->cmap->rstart/bs;
1617   b->cendbs   = B->cmap->rend/bs;
1618 
1619   if (!B->preallocated) {
1620     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1621     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1622     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1623     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1624     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1625     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1626     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1627     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1628     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1629   }
1630 
1631   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1632   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1633 
1634   B->preallocated = PETSC_TRUE;
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1640 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1641 {
1642   PetscInt       m,rstart,cstart,cend;
1643   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1644   const PetscInt *JJ    =0;
1645   PetscScalar    *values=0;
1646   PetscErrorCode ierr;
1647 
1648   PetscFunctionBegin;
1649   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1650   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1651   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1652   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1653   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1654   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1655   m      = B->rmap->n/bs;
1656   rstart = B->rmap->rstart/bs;
1657   cstart = B->cmap->rstart/bs;
1658   cend   = B->cmap->rend/bs;
1659 
1660   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1661   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) {
1663     nz = ii[i+1] - ii[i];
1664     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1665     nz_max = PetscMax(nz_max,nz);
1666     JJ     = jj + ii[i];
1667     for (j=0; j<nz; j++) {
1668       if (*JJ >= cstart) break;
1669       JJ++;
1670     }
1671     d = 0;
1672     for (; j<nz; j++) {
1673       if (*JJ++ >= cend) break;
1674       d++;
1675     }
1676     d_nnz[i] = d;
1677     o_nnz[i] = nz - d;
1678   }
1679   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1680   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1681 
1682   values = (PetscScalar*)V;
1683   if (!values) {
1684     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1685     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1686   }
1687   for (i=0; i<m; i++) {
1688     PetscInt          row    = i + rstart;
1689     PetscInt          ncols  = ii[i+1] - ii[i];
1690     const PetscInt    *icols = jj + ii[i];
1691     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1692     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1693   }
1694 
1695   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1696   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #if defined(PETSC_HAVE_MUMPS)
1703 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1704 #endif
1705 #if defined(PETSC_HAVE_PASTIX)
1706 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1707 #endif
1708 
1709 /*MC
1710    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1711    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1712    the matrix is stored.
1713 
1714   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1715   can call MatSetOption(Mat, MAT_HERMITIAN);
1716 
1717    Options Database Keys:
1718 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1719 
1720   Level: beginner
1721 
1722 .seealso: MatCreateMPISBAIJ
1723 M*/
1724 
1725 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatCreate_MPISBAIJ"
1729 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1730 {
1731   Mat_MPISBAIJ   *b;
1732   PetscErrorCode ierr;
1733   PetscBool      flg;
1734 
1735   PetscFunctionBegin;
1736   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1737   B->data = (void*)b;
1738   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1739 
1740   B->ops->destroy = MatDestroy_MPISBAIJ;
1741   B->ops->view    = MatView_MPISBAIJ;
1742   B->assembled    = PETSC_FALSE;
1743   B->insertmode   = NOT_SET_VALUES;
1744 
1745   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1746   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1747 
1748   /* build local table of row and column ownerships */
1749   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1750 
1751   /* build cache for off array entries formed */
1752   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1753 
1754   b->donotstash  = PETSC_FALSE;
1755   b->colmap      = NULL;
1756   b->garray      = NULL;
1757   b->roworiented = PETSC_TRUE;
1758 
1759   /* stuff used in block assembly */
1760   b->barray = 0;
1761 
1762   /* stuff used for matrix vector multiply */
1763   b->lvec    = 0;
1764   b->Mvctx   = 0;
1765   b->slvec0  = 0;
1766   b->slvec0b = 0;
1767   b->slvec1  = 0;
1768   b->slvec1a = 0;
1769   b->slvec1b = 0;
1770   b->sMvctx  = 0;
1771 
1772   /* stuff for MatGetRow() */
1773   b->rowindices   = 0;
1774   b->rowvalues    = 0;
1775   b->getrowactive = PETSC_FALSE;
1776 
1777   /* hash table stuff */
1778   b->ht           = 0;
1779   b->hd           = 0;
1780   b->ht_size      = 0;
1781   b->ht_flag      = PETSC_FALSE;
1782   b->ht_fact      = 0;
1783   b->ht_total_ct  = 0;
1784   b->ht_insert_ct = 0;
1785 
1786   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1787   b->ijonly = PETSC_FALSE;
1788 
1789   b->in_loc = 0;
1790   b->v_loc  = 0;
1791   b->n_loc  = 0;
1792   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1793   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1794   if (flg) {
1795     PetscReal fact = 1.39;
1796     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1798     if (fact <= 1.0) fact = 1.39;
1799     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1800     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1801   }
1802   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1803 
1804 #if defined(PETSC_HAVE_PASTIX)
1805   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1806 #endif
1807 #if defined(PETSC_HAVE_MUMPS)
1808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1809 #endif
1810   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1816 
1817   B->symmetric                  = PETSC_TRUE;
1818   B->structurally_symmetric     = PETSC_TRUE;
1819   B->symmetric_set              = PETSC_TRUE;
1820   B->structurally_symmetric_set = PETSC_TRUE;
1821 
1822   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 /*MC
1827    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1828 
1829    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1830    and MATMPISBAIJ otherwise.
1831 
1832    Options Database Keys:
1833 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1834 
1835   Level: beginner
1836 
1837 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1838 M*/
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1842 /*@C
1843    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1844    the user should preallocate the matrix storage by setting the parameters
1845    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1846    performance can be increased by more than a factor of 50.
1847 
1848    Collective on Mat
1849 
1850    Input Parameters:
1851 +  A - the matrix
1852 .  bs   - size of blockk
1853 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1854            submatrix  (same for all local rows)
1855 .  d_nnz - array containing the number of block nonzeros in the various block rows
1856            in the upper triangular and diagonal part of the in diagonal portion of the local
1857            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1858            for the diagonal entry and set a value even if it is zero.
1859 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1860            submatrix (same for all local rows).
1861 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1862            off-diagonal portion of the local submatrix that is right of the diagonal
1863            (possibly different for each block row) or NULL.
1864 
1865 
1866    Options Database Keys:
1867 .   -mat_no_unroll - uses code that does not unroll the loops in the
1868                      block calculations (much slower)
1869 .   -mat_block_size - size of the blocks to use
1870 
1871    Notes:
1872 
1873    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1874    than it must be used on all processors that share the object for that argument.
1875 
1876    If the *_nnz parameter is given then the *_nz parameter is ignored
1877 
1878    Storage Information:
1879    For a square global matrix we define each processor's diagonal portion
1880    to be its local rows and the corresponding columns (a square submatrix);
1881    each processor's off-diagonal portion encompasses the remainder of the
1882    local matrix (a rectangular submatrix).
1883 
1884    The user can specify preallocated storage for the diagonal part of
1885    the local submatrix with either d_nz or d_nnz (not both).  Set
1886    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1887    memory allocation.  Likewise, specify preallocated storage for the
1888    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1889 
1890    You can call MatGetInfo() to get information on how effective the preallocation was;
1891    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1892    You can also run with the option -info and look for messages with the string
1893    malloc in them to see if additional memory allocation was needed.
1894 
1895    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1896    the figure below we depict these three local rows and all columns (0-11).
1897 
1898 .vb
1899            0 1 2 3 4 5 6 7 8 9 10 11
1900           --------------------------
1901    row 3  |. . . d d d o o o o  o  o
1902    row 4  |. . . d d d o o o o  o  o
1903    row 5  |. . . d d d o o o o  o  o
1904           --------------------------
1905 .ve
1906 
1907    Thus, any entries in the d locations are stored in the d (diagonal)
1908    submatrix, and any entries in the o locations are stored in the
1909    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1910    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1911 
1912    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1913    plus the diagonal part of the d matrix,
1914    and o_nz should indicate the number of block nonzeros per row in the o matrix
1915 
1916    In general, for PDE problems in which most nonzeros are near the diagonal,
1917    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1918    or you will get TERRIBLE performance; see the users' manual chapter on
1919    matrices.
1920 
1921    Level: intermediate
1922 
1923 .keywords: matrix, block, aij, compressed row, sparse, parallel
1924 
1925 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1926 @*/
1927 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1928 {
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1933   PetscValidType(B,1);
1934   PetscValidLogicalCollectiveInt(B,bs,2);
1935   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatCreateSBAIJ"
1941 /*@C
1942    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1943    (block compressed row).  For good matrix assembly performance
1944    the user should preallocate the matrix storage by setting the parameters
1945    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1946    performance can be increased by more than a factor of 50.
1947 
1948    Collective on MPI_Comm
1949 
1950    Input Parameters:
1951 +  comm - MPI communicator
1952 .  bs   - size of blockk
1953 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1954            This value should be the same as the local size used in creating the
1955            y vector for the matrix-vector product y = Ax.
1956 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1957            This value should be the same as the local size used in creating the
1958            x vector for the matrix-vector product y = Ax.
1959 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1960 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1961 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1962            submatrix  (same for all local rows)
1963 .  d_nnz - array containing the number of block nonzeros in the various block rows
1964            in the upper triangular portion of the in diagonal portion of the local
1965            (possibly different for each block block row) or NULL.
1966            If you plan to factor the matrix you must leave room for the diagonal entry and
1967            set its value even if it is zero.
1968 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1969            submatrix (same for all local rows).
1970 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1971            off-diagonal portion of the local submatrix (possibly different for
1972            each block row) or NULL.
1973 
1974    Output Parameter:
1975 .  A - the matrix
1976 
1977    Options Database Keys:
1978 .   -mat_no_unroll - uses code that does not unroll the loops in the
1979                      block calculations (much slower)
1980 .   -mat_block_size - size of the blocks to use
1981 .   -mat_mpi - use the parallel matrix data structures even on one processor
1982                (defaults to using SeqBAIJ format on one processor)
1983 
1984    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1985    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1986    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1987 
1988    Notes:
1989    The number of rows and columns must be divisible by blocksize.
1990    This matrix type does not support complex Hermitian operation.
1991 
1992    The user MUST specify either the local or global matrix dimensions
1993    (possibly both).
1994 
1995    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1996    than it must be used on all processors that share the object for that argument.
1997 
1998    If the *_nnz parameter is given then the *_nz parameter is ignored
1999 
2000    Storage Information:
2001    For a square global matrix we define each processor's diagonal portion
2002    to be its local rows and the corresponding columns (a square submatrix);
2003    each processor's off-diagonal portion encompasses the remainder of the
2004    local matrix (a rectangular submatrix).
2005 
2006    The user can specify preallocated storage for the diagonal part of
2007    the local submatrix with either d_nz or d_nnz (not both).  Set
2008    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2009    memory allocation.  Likewise, specify preallocated storage for the
2010    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2011 
2012    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2013    the figure below we depict these three local rows and all columns (0-11).
2014 
2015 .vb
2016            0 1 2 3 4 5 6 7 8 9 10 11
2017           --------------------------
2018    row 3  |. . . d d d o o o o  o  o
2019    row 4  |. . . d d d o o o o  o  o
2020    row 5  |. . . d d d o o o o  o  o
2021           --------------------------
2022 .ve
2023 
2024    Thus, any entries in the d locations are stored in the d (diagonal)
2025    submatrix, and any entries in the o locations are stored in the
2026    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2027    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2028 
2029    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2030    plus the diagonal part of the d matrix,
2031    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2032    In general, for PDE problems in which most nonzeros are near the diagonal,
2033    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2034    or you will get TERRIBLE performance; see the users' manual chapter on
2035    matrices.
2036 
2037    Level: intermediate
2038 
2039 .keywords: matrix, block, aij, compressed row, sparse, parallel
2040 
2041 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2042 @*/
2043 
2044 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2045 {
2046   PetscErrorCode ierr;
2047   PetscMPIInt    size;
2048 
2049   PetscFunctionBegin;
2050   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2051   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2053   if (size > 1) {
2054     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2055     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2056   } else {
2057     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2058     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2059   }
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2066 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2067 {
2068   Mat            mat;
2069   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2070   PetscErrorCode ierr;
2071   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2072   PetscScalar    *array;
2073 
2074   PetscFunctionBegin;
2075   *newmat = 0;
2076 
2077   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2078   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2079   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2080   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2081   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2082   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2083 
2084   mat->factortype   = matin->factortype;
2085   mat->preallocated = PETSC_TRUE;
2086   mat->assembled    = PETSC_TRUE;
2087   mat->insertmode   = NOT_SET_VALUES;
2088 
2089   a      = (Mat_MPISBAIJ*)mat->data;
2090   a->bs2 = oldmat->bs2;
2091   a->mbs = oldmat->mbs;
2092   a->nbs = oldmat->nbs;
2093   a->Mbs = oldmat->Mbs;
2094   a->Nbs = oldmat->Nbs;
2095 
2096 
2097   a->size         = oldmat->size;
2098   a->rank         = oldmat->rank;
2099   a->donotstash   = oldmat->donotstash;
2100   a->roworiented  = oldmat->roworiented;
2101   a->rowindices   = 0;
2102   a->rowvalues    = 0;
2103   a->getrowactive = PETSC_FALSE;
2104   a->barray       = 0;
2105   a->rstartbs     = oldmat->rstartbs;
2106   a->rendbs       = oldmat->rendbs;
2107   a->cstartbs     = oldmat->cstartbs;
2108   a->cendbs       = oldmat->cendbs;
2109 
2110   /* hash table stuff */
2111   a->ht           = 0;
2112   a->hd           = 0;
2113   a->ht_size      = 0;
2114   a->ht_flag      = oldmat->ht_flag;
2115   a->ht_fact      = oldmat->ht_fact;
2116   a->ht_total_ct  = 0;
2117   a->ht_insert_ct = 0;
2118 
2119   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2120   if (oldmat->colmap) {
2121 #if defined(PETSC_USE_CTABLE)
2122     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2123 #else
2124     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2125     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2126     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2127 #endif
2128   } else a->colmap = 0;
2129 
2130   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2131     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2132     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2133     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2134   } else a->garray = 0;
2135 
2136   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2137   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2139   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2140   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2141 
2142   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2144   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2146 
2147   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2148   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2149   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2150   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2151   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2152   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2153   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2154   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2158   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2159   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2160 
2161   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2162   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2163   a->sMvctx = oldmat->sMvctx;
2164   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2165 
2166   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2167   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2168   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2169   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2170   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2171   *newmat = mat;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatLoad_MPISBAIJ"
2177 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2178 {
2179   PetscErrorCode ierr;
2180   PetscInt       i,nz,j,rstart,rend;
2181   PetscScalar    *vals,*buf;
2182   MPI_Comm       comm;
2183   MPI_Status     status;
2184   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2185   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2186   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2187   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2188   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2189   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2190   int            fd;
2191 
2192   PetscFunctionBegin;
2193   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2194   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2195   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2196   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2197 
2198   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2199   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2200   if (!rank) {
2201     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2202     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2203     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2204     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2205   }
2206 
2207   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2208 
2209   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2210   M    = header[1];
2211   N    = header[2];
2212 
2213   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2214   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2215   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2216 
2217   /* If global sizes are set, check if they are consistent with that given in the file */
2218   if (sizesset) {
2219     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2220   }
2221   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2222   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2223 
2224   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2225 
2226   /*
2227      This code adds extra rows to make sure the number of rows is
2228      divisible by the blocksize
2229   */
2230   Mbs        = M/bs;
2231   extra_rows = bs - M + bs*(Mbs);
2232   if (extra_rows == bs) extra_rows = 0;
2233   else                  Mbs++;
2234   if (extra_rows &&!rank) {
2235     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2236   }
2237 
2238   /* determine ownership of all rows */
2239   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2240     mbs = Mbs/size + ((Mbs % size) > rank);
2241     m   = mbs*bs;
2242   } else { /* User Set */
2243     m   = newmat->rmap->n;
2244     mbs = m/bs;
2245   }
2246   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2247   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2248   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2249   rowners[0] = 0;
2250   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2251   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2252   rstart = rowners[rank];
2253   rend   = rowners[rank+1];
2254 
2255   /* distribute row lengths to all processors */
2256   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2257   if (!rank) {
2258     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2259     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2260     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2261     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2262     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2263     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2264     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2265   } else {
2266     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2267   }
2268 
2269   if (!rank) {   /* procs[0] */
2270     /* calculate the number of nonzeros on each processor */
2271     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2272     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2273     for (i=0; i<size; i++) {
2274       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2275         procsnz[i] += rowlengths[j];
2276       }
2277     }
2278     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2279 
2280     /* determine max buffer needed and allocate it */
2281     maxnz = 0;
2282     for (i=0; i<size; i++) {
2283       maxnz = PetscMax(maxnz,procsnz[i]);
2284     }
2285     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2286 
2287     /* read in my part of the matrix column indices  */
2288     nz     = procsnz[0];
2289     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2290     mycols = ibuf;
2291     if (size == 1) nz -= extra_rows;
2292     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2293     if (size == 1) {
2294       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2295     }
2296 
2297     /* read in every ones (except the last) and ship off */
2298     for (i=1; i<size-1; i++) {
2299       nz   = procsnz[i];
2300       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2301       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2302     }
2303     /* read in the stuff for the last proc */
2304     if (size != 1) {
2305       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2306       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2307       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2308       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2309     }
2310     ierr = PetscFree(cols);CHKERRQ(ierr);
2311   } else {  /* procs[i], i>0 */
2312     /* determine buffer space needed for message */
2313     nz = 0;
2314     for (i=0; i<m; i++) nz += locrowlens[i];
2315     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2316     mycols = ibuf;
2317     /* receive message of column indices*/
2318     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2319     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2320     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2321   }
2322 
2323   /* loop over local rows, determining number of off diagonal entries */
2324   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2325   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2326   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2327   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2328   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2329   rowcount = 0;
2330   nzcount  = 0;
2331   for (i=0; i<mbs; i++) {
2332     dcount  = 0;
2333     odcount = 0;
2334     for (j=0; j<bs; j++) {
2335       kmax = locrowlens[rowcount];
2336       for (k=0; k<kmax; k++) {
2337         tmp = mycols[nzcount++]/bs; /* block col. index */
2338         if (!mask[tmp]) {
2339           mask[tmp] = 1;
2340           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2341           else masked1[dcount++] = tmp; /* entry in diag portion */
2342         }
2343       }
2344       rowcount++;
2345     }
2346 
2347     dlens[i]  = dcount;  /* d_nzz[i] */
2348     odlens[i] = odcount; /* o_nzz[i] */
2349 
2350     /* zero out the mask elements we set */
2351     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2352     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2353   }
2354   if (!sizesset) {
2355     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2356   }
2357   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2358   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2359 
2360   if (!rank) {
2361     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2362     /* read in my part of the matrix numerical values  */
2363     nz     = procsnz[0];
2364     vals   = buf;
2365     mycols = ibuf;
2366     if (size == 1) nz -= extra_rows;
2367     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2368     if (size == 1) {
2369       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2370     }
2371 
2372     /* insert into matrix */
2373     jj = rstart*bs;
2374     for (i=0; i<m; i++) {
2375       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2376       mycols += locrowlens[i];
2377       vals   += locrowlens[i];
2378       jj++;
2379     }
2380 
2381     /* read in other processors (except the last one) and ship out */
2382     for (i=1; i<size-1; i++) {
2383       nz   = procsnz[i];
2384       vals = buf;
2385       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2386       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2387     }
2388     /* the last proc */
2389     if (size != 1) {
2390       nz   = procsnz[i] - extra_rows;
2391       vals = buf;
2392       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2393       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2394       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2395     }
2396     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2397 
2398   } else {
2399     /* receive numeric values */
2400     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2401 
2402     /* receive message of values*/
2403     vals   = buf;
2404     mycols = ibuf;
2405     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2406     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2407     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2408 
2409     /* insert into matrix */
2410     jj = rstart*bs;
2411     for (i=0; i<m; i++) {
2412       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2413       mycols += locrowlens[i];
2414       vals   += locrowlens[i];
2415       jj++;
2416     }
2417   }
2418 
2419   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2420   ierr = PetscFree(buf);CHKERRQ(ierr);
2421   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2422   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2423   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2424   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2425   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2426   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2427   PetscFunctionReturn(0);
2428 }
2429 
2430 #undef __FUNCT__
2431 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2432 /*XXXXX@
2433    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2434 
2435    Input Parameters:
2436 .  mat  - the matrix
2437 .  fact - factor
2438 
2439    Not Collective on Mat, each process can have a different hash factor
2440 
2441    Level: advanced
2442 
2443   Notes:
2444    This can also be set by the command line option: -mat_use_hash_table fact
2445 
2446 .keywords: matrix, hashtable, factor, HT
2447 
2448 .seealso: MatSetOption()
2449 @XXXXX*/
2450 
2451 
2452 #undef __FUNCT__
2453 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2454 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2455 {
2456   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2457   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2458   PetscReal      atmp;
2459   PetscReal      *work,*svalues,*rvalues;
2460   PetscErrorCode ierr;
2461   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2462   PetscMPIInt    rank,size;
2463   PetscInt       *rowners_bs,dest,count,source;
2464   PetscScalar    *va;
2465   MatScalar      *ba;
2466   MPI_Status     stat;
2467 
2468   PetscFunctionBegin;
2469   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2470   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2471   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2472 
2473   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2474   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2475 
2476   bs  = A->rmap->bs;
2477   mbs = a->mbs;
2478   Mbs = a->Mbs;
2479   ba  = b->a;
2480   bi  = b->i;
2481   bj  = b->j;
2482 
2483   /* find ownerships */
2484   rowners_bs = A->rmap->range;
2485 
2486   /* each proc creates an array to be distributed */
2487   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2488   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2489 
2490   /* row_max for B */
2491   if (rank != size-1) {
2492     for (i=0; i<mbs; i++) {
2493       ncols = bi[1] - bi[0]; bi++;
2494       brow  = bs*i;
2495       for (j=0; j<ncols; j++) {
2496         bcol = bs*(*bj);
2497         for (kcol=0; kcol<bs; kcol++) {
2498           col  = bcol + kcol;                /* local col index */
2499           col += rowners_bs[rank+1];      /* global col index */
2500           for (krow=0; krow<bs; krow++) {
2501             atmp = PetscAbsScalar(*ba); ba++;
2502             row  = brow + krow;   /* local row index */
2503             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2504             if (work[col] < atmp) work[col] = atmp;
2505           }
2506         }
2507         bj++;
2508       }
2509     }
2510 
2511     /* send values to its owners */
2512     for (dest=rank+1; dest<size; dest++) {
2513       svalues = work + rowners_bs[dest];
2514       count   = rowners_bs[dest+1]-rowners_bs[dest];
2515       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2516     }
2517   }
2518 
2519   /* receive values */
2520   if (rank) {
2521     rvalues = work;
2522     count   = rowners_bs[rank+1]-rowners_bs[rank];
2523     for (source=0; source<rank; source++) {
2524       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2525       /* process values */
2526       for (i=0; i<count; i++) {
2527         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2528       }
2529     }
2530   }
2531 
2532   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2533   ierr = PetscFree(work);CHKERRQ(ierr);
2534   PetscFunctionReturn(0);
2535 }
2536 
2537 #undef __FUNCT__
2538 #define __FUNCT__ "MatSOR_MPISBAIJ"
2539 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2540 {
2541   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2542   PetscErrorCode    ierr;
2543   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2544   PetscScalar       *x,*ptr,*from;
2545   Vec               bb1;
2546   const PetscScalar *b;
2547 
2548   PetscFunctionBegin;
2549   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2550   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2551 
2552   if (flag == SOR_APPLY_UPPER) {
2553     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2554     PetscFunctionReturn(0);
2555   }
2556 
2557   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2558     if (flag & SOR_ZERO_INITIAL_GUESS) {
2559       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2560       its--;
2561     }
2562 
2563     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2564     while (its--) {
2565 
2566       /* lower triangular part: slvec0b = - B^T*xx */
2567       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2568 
2569       /* copy xx into slvec0a */
2570       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2571       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2572       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2573       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2574 
2575       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2576 
2577       /* copy bb into slvec1a */
2578       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2579       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2580       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2581       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2582 
2583       /* set slvec1b = 0 */
2584       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2585 
2586       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2587       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2588       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2589       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2590 
2591       /* upper triangular part: bb1 = bb1 - B*x */
2592       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2593 
2594       /* local diagonal sweep */
2595       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2596     }
2597     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2598   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2599     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2600   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2601     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2602   } else if (flag & SOR_EISENSTAT) {
2603     Vec               xx1;
2604     PetscBool         hasop;
2605     const PetscScalar *diag;
2606     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2607     PetscInt          i,n;
2608 
2609     if (!mat->xx1) {
2610       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2611       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2612     }
2613     xx1 = mat->xx1;
2614     bb1 = mat->bb1;
2615 
2616     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2617 
2618     if (!mat->diag) {
2619       /* this is wrong for same matrix with new nonzero values */
2620       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2621       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2622     }
2623     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2624 
2625     if (hasop) {
2626       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2627       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2628     } else {
2629       /*
2630           These two lines are replaced by code that may be a bit faster for a good compiler
2631       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2632       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2633       */
2634       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2635       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2636       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2637       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2638       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2639       if (omega == 1.0) {
2640         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2641         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2642       } else {
2643         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2644         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2645       }
2646       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2647       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2648       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2649       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2650     }
2651 
2652     /* multiply off-diagonal portion of matrix */
2653     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2654     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2655     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2656     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2657     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2658     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2659     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2660     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2661     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2662     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2663 
2664     /* local sweep */
2665     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2666     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2667   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2668   PetscFunctionReturn(0);
2669 }
2670 
2671 #undef __FUNCT__
2672 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2673 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2674 {
2675   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2676   PetscErrorCode ierr;
2677   Vec            lvec1,bb1;
2678 
2679   PetscFunctionBegin;
2680   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2681   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2682 
2683   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2684     if (flag & SOR_ZERO_INITIAL_GUESS) {
2685       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2686       its--;
2687     }
2688 
2689     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2690     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2691     while (its--) {
2692       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2693 
2694       /* lower diagonal part: bb1 = bb - B^T*xx */
2695       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2696       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2697 
2698       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2699       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2700       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2701 
2702       /* upper diagonal part: bb1 = bb1 - B*x */
2703       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2704       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2705 
2706       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2707 
2708       /* diagonal sweep */
2709       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2710     }
2711     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2712     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2713   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2714   PetscFunctionReturn(0);
2715 }
2716 
2717 #undef __FUNCT__
2718 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2719 /*@
2720      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2721          CSR format the local rows.
2722 
2723    Collective on MPI_Comm
2724 
2725    Input Parameters:
2726 +  comm - MPI communicator
2727 .  bs - the block size, only a block size of 1 is supported
2728 .  m - number of local rows (Cannot be PETSC_DECIDE)
2729 .  n - This value should be the same as the local size used in creating the
2730        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2731        calculated if N is given) For square matrices n is almost always m.
2732 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2733 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2734 .   i - row indices
2735 .   j - column indices
2736 -   a - matrix values
2737 
2738    Output Parameter:
2739 .   mat - the matrix
2740 
2741    Level: intermediate
2742 
2743    Notes:
2744        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2745      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2746      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2747 
2748        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2749 
2750 .keywords: matrix, aij, compressed row, sparse, parallel
2751 
2752 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2753           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2754 @*/
2755 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2756 {
2757   PetscErrorCode ierr;
2758 
2759 
2760   PetscFunctionBegin;
2761   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2762   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2763   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2764   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2765   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2766   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2767   PetscFunctionReturn(0);
2768 }
2769 
2770 
2771 #undef __FUNCT__
2772 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2773 /*@C
2774    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2775    (the default parallel PETSc format).
2776 
2777    Collective on MPI_Comm
2778 
2779    Input Parameters:
2780 +  A - the matrix
2781 .  bs - the block size
2782 .  i - the indices into j for the start of each local row (starts with zero)
2783 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2784 -  v - optional values in the matrix
2785 
2786    Level: developer
2787 
2788 .keywords: matrix, aij, compressed row, sparse, parallel
2789 
2790 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2791 @*/
2792 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2793 {
2794   PetscErrorCode ierr;
2795 
2796   PetscFunctionBegin;
2797   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 
2802