xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision eb3f19e46b38d5d7d03c8a3cfecdfb705cfcba06)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
9 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
10 {
11   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
12   PetscErrorCode ierr;
13 
14   PetscFunctionBegin;
15   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
16   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
22 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
23 {
24   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
25   PetscErrorCode ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
29   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 
33 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
34   { \
35  \
36     brow = row/bs;  \
37     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
38     rmax = aimax[brow]; nrow = ailen[brow]; \
39     bcol = col/bs; \
40     ridx = row % bs; cidx = col % bs; \
41     low  = 0; high = nrow; \
42     while (high-low > 3) { \
43       t = (low+high)/2; \
44       if (rp[t] > bcol) high = t; \
45       else              low  = t; \
46     } \
47     for (_i=low; _i<high; _i++) { \
48       if (rp[_i] > bcol) break; \
49       if (rp[_i] == bcol) { \
50         bap = ap + bs2*_i + bs*cidx + ridx; \
51         if (addv == ADD_VALUES) *bap += value;  \
52         else                    *bap  = value;  \
53         goto a_noinsert; \
54       } \
55     } \
56     if (a->nonew == 1) goto a_noinsert; \
57     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
59     N = nrow++ - 1;  \
60     /* shift up all the later entries in this row */ \
61     for (ii=N; ii>=_i; ii--) { \
62       rp[ii+1] = rp[ii]; \
63       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
64     } \
65     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
66     rp[_i]                      = bcol;  \
67     ap[bs2*_i + bs*cidx + ridx] = value;  \
68     A->nonzerostate++;\
69 a_noinsert:; \
70     ailen[brow] = nrow; \
71   }
72 
73 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
74   { \
75     brow = row/bs;  \
76     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
77     rmax = bimax[brow]; nrow = bilen[brow]; \
78     bcol = col/bs; \
79     ridx = row % bs; cidx = col % bs; \
80     low  = 0; high = nrow; \
81     while (high-low > 3) { \
82       t = (low+high)/2; \
83       if (rp[t] > bcol) high = t; \
84       else              low  = t; \
85     } \
86     for (_i=low; _i<high; _i++) { \
87       if (rp[_i] > bcol) break; \
88       if (rp[_i] == bcol) { \
89         bap = ap + bs2*_i + bs*cidx + ridx; \
90         if (addv == ADD_VALUES) *bap += value;  \
91         else                    *bap  = value;  \
92         goto b_noinsert; \
93       } \
94     } \
95     if (b->nonew == 1) goto b_noinsert; \
96     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
97     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
98     N = nrow++ - 1;  \
99     /* shift up all the later entries in this row */ \
100     for (ii=N; ii>=_i; ii--) { \
101       rp[ii+1] = rp[ii]; \
102       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
103     } \
104     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
105     rp[_i]                      = bcol;  \
106     ap[bs2*_i + bs*cidx + ridx] = value;  \
107     B->nonzerostate++;\
108 b_noinsert:; \
109     bilen[brow] = nrow; \
110   }
111 
112 /* Only add/insert a(i,j) with i<=j (blocks).
113    Any a(i,j) with i>j input by user is ingored.
114 */
115 #undef __FUNCT__
116 #define __FUNCT__ "MatSetValues_MPISBAIJ"
117 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
118 {
119   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
120   MatScalar      value;
121   PetscBool      roworiented = baij->roworiented;
122   PetscErrorCode ierr;
123   PetscInt       i,j,row,col;
124   PetscInt       rstart_orig=mat->rmap->rstart;
125   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
126   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
127 
128   /* Some Variables required in the macro */
129   Mat          A     = baij->A;
130   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
131   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
132   MatScalar    *aa   =a->a;
133 
134   Mat         B     = baij->B;
135   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
136   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
137   MatScalar   *ba   =b->a;
138 
139   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
140   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
141   MatScalar *ap,*bap;
142 
143   /* for stash */
144   PetscInt  n_loc, *in_loc = NULL;
145   MatScalar *v_loc = NULL;
146 
147   PetscFunctionBegin;
148   if (!baij->donotstash) {
149     if (n > baij->n_loc) {
150       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
151       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
152       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
153       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
154 
155       baij->n_loc = n;
156     }
157     in_loc = baij->in_loc;
158     v_loc  = baij->v_loc;
159   }
160 
161   for (i=0; i<m; i++) {
162     if (im[i] < 0) continue;
163 #if defined(PETSC_USE_DEBUG)
164     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
165 #endif
166     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
167       row = im[i] - rstart_orig;              /* local row index */
168       for (j=0; j<n; j++) {
169         if (im[i]/bs > in[j]/bs) {
170           if (a->ignore_ltriangular) {
171             continue;    /* ignore lower triangular blocks */
172           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
173         }
174         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
175           col  = in[j] - cstart_orig;         /* local col index */
176           brow = row/bs; bcol = col/bs;
177           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
178           if (roworiented) value = v[i*n+j];
179           else             value = v[i+j*m];
180           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
181           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
182         } else if (in[j] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
185 #endif
186         else {  /* off-diag entry (B) */
187           if (mat->was_assembled) {
188             if (!baij->colmap) {
189               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
190             }
191 #if defined(PETSC_USE_CTABLE)
192             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
193             col  = col - 1;
194 #else
195             col = baij->colmap[in[j]/bs] - 1;
196 #endif
197             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
198               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
199               col  =  in[j];
200               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
201               B    = baij->B;
202               b    = (Mat_SeqBAIJ*)(B)->data;
203               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
204               ba   = b->a;
205             } else col += in[j]%bs;
206           } else col = in[j];
207           if (roworiented) value = v[i*n+j];
208           else             value = v[i+j*m];
209           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
210           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
211         }
212       }
213     } else {  /* off processor entry */
214       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
215       if (!baij->donotstash) {
216         mat->assembled = PETSC_FALSE;
217         n_loc          = 0;
218         for (j=0; j<n; j++) {
219           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
220           in_loc[n_loc] = in[j];
221           if (roworiented) {
222             v_loc[n_loc] = v[i*n+j];
223           } else {
224             v_loc[n_loc] = v[j*m+i];
225           }
226           n_loc++;
227         }
228         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
229       }
230     }
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
237 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
238 {
239   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
240   const MatScalar *value;
241   MatScalar       *barray     =baij->barray;
242   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
243   PetscErrorCode  ierr;
244   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
245   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
246   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
247 
248   PetscFunctionBegin;
249   if (!barray) {
250     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
251     baij->barray = barray;
252   }
253 
254   if (roworiented) {
255     stepval = (n-1)*bs;
256   } else {
257     stepval = (m-1)*bs;
258   }
259   for (i=0; i<m; i++) {
260     if (im[i] < 0) continue;
261 #if defined(PETSC_USE_DEBUG)
262     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
263 #endif
264     if (im[i] >= rstart && im[i] < rend) {
265       row = im[i] - rstart;
266       for (j=0; j<n; j++) {
267         if (im[i] > in[j]) {
268           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
269           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
270         }
271         /* If NumCol = 1 then a copy is not required */
272         if ((roworiented) && (n == 1)) {
273           barray = (MatScalar*) v + i*bs2;
274         } else if ((!roworiented) && (m == 1)) {
275           barray = (MatScalar*) v + j*bs2;
276         } else { /* Here a copy is required */
277           if (roworiented) {
278             value = v + i*(stepval+bs)*bs + j*bs;
279           } else {
280             value = v + j*(stepval+bs)*bs + i*bs;
281           }
282           for (ii=0; ii<bs; ii++,value+=stepval) {
283             for (jj=0; jj<bs; jj++) {
284               *barray++ = *value++;
285             }
286           }
287           barray -=bs2;
288         }
289 
290         if (in[j] >= cstart && in[j] < cend) {
291           col  = in[j] - cstart;
292           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
293         } else if (in[j] < 0) continue;
294 #if defined(PETSC_USE_DEBUG)
295         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
296 #endif
297         else {
298           if (mat->was_assembled) {
299             if (!baij->colmap) {
300               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
301             }
302 
303 #if defined(PETSC_USE_DEBUG)
304 #if defined(PETSC_USE_CTABLE)
305             { PetscInt data;
306               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
307               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
308             }
309 #else
310             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
311 #endif
312 #endif
313 #if defined(PETSC_USE_CTABLE)
314             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
315             col  = (col - 1)/bs;
316 #else
317             col = (baij->colmap[in[j]] - 1)/bs;
318 #endif
319             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
320               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
321               col  = in[j];
322             }
323           } else col = in[j];
324           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
325         }
326       }
327     } else {
328       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
329       if (!baij->donotstash) {
330         if (roworiented) {
331           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
332         } else {
333           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
334         }
335       }
336     }
337   }
338   PetscFunctionReturn(0);
339 }
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "MatGetValues_MPISBAIJ"
343 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
344 {
345   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
346   PetscErrorCode ierr;
347   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
348   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
349 
350   PetscFunctionBegin;
351   for (i=0; i<m; i++) {
352     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
353     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
354     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
355       row = idxm[i] - bsrstart;
356       for (j=0; j<n; j++) {
357         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
358         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
359         if (idxn[j] >= bscstart && idxn[j] < bscend) {
360           col  = idxn[j] - bscstart;
361           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
362         } else {
363           if (!baij->colmap) {
364             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
365           }
366 #if defined(PETSC_USE_CTABLE)
367           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
368           data--;
369 #else
370           data = baij->colmap[idxn[j]/bs]-1;
371 #endif
372           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
373           else {
374             col  = data + idxn[j]%bs;
375             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
376           }
377         }
378       }
379     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
380   }
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "MatNorm_MPISBAIJ"
386 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
387 {
388   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
389   PetscErrorCode ierr;
390   PetscReal      sum[2],*lnorm2;
391 
392   PetscFunctionBegin;
393   if (baij->size == 1) {
394     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
395   } else {
396     if (type == NORM_FROBENIUS) {
397       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
398       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
399       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
400       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
401       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
402       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
403       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
404       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
405     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
406       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
407       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
408       PetscReal    *rsum,*rsum2,vabs;
409       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
410       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
411       MatScalar    *v;
412 
413       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
414       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
415       /* Amat */
416       v = amat->a; jj = amat->j;
417       for (brow=0; brow<mbs; brow++) {
418         grow = bs*(rstart + brow);
419         nz   = amat->i[brow+1] - amat->i[brow];
420         for (bcol=0; bcol<nz; bcol++) {
421           gcol = bs*(rstart + *jj); jj++;
422           for (col=0; col<bs; col++) {
423             for (row=0; row<bs; row++) {
424               vabs            = PetscAbsScalar(*v); v++;
425               rsum[gcol+col] += vabs;
426               /* non-diagonal block */
427               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
428             }
429           }
430         }
431       }
432       /* Bmat */
433       v = bmat->a; jj = bmat->j;
434       for (brow=0; brow<mbs; brow++) {
435         grow = bs*(rstart + brow);
436         nz = bmat->i[brow+1] - bmat->i[brow];
437         for (bcol=0; bcol<nz; bcol++) {
438           gcol = bs*garray[*jj]; jj++;
439           for (col=0; col<bs; col++) {
440             for (row=0; row<bs; row++) {
441               vabs            = PetscAbsScalar(*v); v++;
442               rsum[gcol+col] += vabs;
443               rsum[grow+row] += vabs;
444             }
445           }
446         }
447       }
448       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
449       *norm = 0.0;
450       for (col=0; col<mat->cmap->N; col++) {
451         if (rsum2[col] > *norm) *norm = rsum2[col];
452       }
453       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
454     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
455   }
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
461 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
462 {
463   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
464   PetscErrorCode ierr;
465   PetscInt       nstash,reallocs;
466   InsertMode     addv;
467 
468   PetscFunctionBegin;
469   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
470 
471   /* make sure all processors are either in INSERTMODE or ADDMODE */
472   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
473   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
474   mat->insertmode = addv; /* in case this processor had no cache */
475 
476   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
477   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
478   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
479   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
480   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
481   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
487 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
488 {
489   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
490   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
491   PetscErrorCode ierr;
492   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
493   PetscInt       *row,*col;
494   PetscBool      other_disassembled;
495   PetscMPIInt    n;
496   PetscBool      r1,r2,r3;
497   MatScalar      *val;
498   InsertMode     addv = mat->insertmode;
499 
500   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
501   PetscFunctionBegin;
502   if (!baij->donotstash &&  !mat->nooffprocentries) {
503     while (1) {
504       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
505       if (!flg) break;
506 
507       for (i=0; i<n;) {
508         /* Now identify the consecutive vals belonging to the same row */
509         for (j=i,rstart=row[j]; j<n; j++) {
510           if (row[j] != rstart) break;
511         }
512         if (j < n) ncols = j-i;
513         else       ncols = n-i;
514         /* Now assemble all these values with a single function call */
515         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
516         i    = j;
517       }
518     }
519     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
520     /* Now process the block-stash. Since the values are stashed column-oriented,
521        set the roworiented flag to column oriented, and after MatSetValues()
522        restore the original flags */
523     r1 = baij->roworiented;
524     r2 = a->roworiented;
525     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
526 
527     baij->roworiented = PETSC_FALSE;
528     a->roworiented    = PETSC_FALSE;
529 
530     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) {
538           if (row[j] != rstart) break;
539         }
540         if (j < n) ncols = j-i;
541         else       ncols = n-i;
542         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
543         i    = j;
544       }
545     }
546     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
547 
548     baij->roworiented = r1;
549     a->roworiented    = r2;
550 
551     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
552   }
553 
554   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
555   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
556 
557   /* determine if any processor has disassembled, if so we must
558      also disassemble ourselfs, in order that we may reassemble. */
559   /*
560      if nonzero structure of submatrix B cannot change then we know that
561      no processor disassembled thus we can skip this stuff
562   */
563   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
564     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
565     if (mat->was_assembled && !other_disassembled) {
566       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
567     }
568   }
569 
570   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
571     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
572   }
573   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
575 
576   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
577 
578   baij->rowvalues = 0;
579 
580   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
581   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
582     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
583     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
589 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
590 #include <petscdraw.h>
591 #undef __FUNCT__
592 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
593 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
594 {
595   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
596   PetscErrorCode    ierr;
597   PetscInt          bs   = mat->rmap->bs;
598   PetscMPIInt       rank = baij->rank;
599   PetscBool         iascii,isdraw;
600   PetscViewer       sviewer;
601   PetscViewerFormat format;
602 
603   PetscFunctionBegin;
604   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
605   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
606   if (iascii) {
607     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
608     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
609       MatInfo info;
610       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
611       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
612       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
613       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
614       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
615       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
616       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
617       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
618       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
619       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
620       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
621       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
622       PetscFunctionReturn(0);
623     } else if (format == PETSC_VIEWER_ASCII_INFO) {
624       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
625       PetscFunctionReturn(0);
626     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
627       PetscFunctionReturn(0);
628     }
629   }
630 
631   if (isdraw) {
632     PetscDraw draw;
633     PetscBool isnull;
634     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
635     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
636   }
637 
638   {
639     /* assemble the entire matrix onto first processor. */
640     Mat          A;
641     Mat_SeqSBAIJ *Aloc;
642     Mat_SeqBAIJ  *Bloc;
643     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
644     MatScalar    *a;
645     const char   *matname;
646 
647     /* Should this be the same type as mat? */
648     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
649     if (!rank) {
650       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
651     } else {
652       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
653     }
654     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
655     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
656     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
657     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
658 
659     /* copy over the A part */
660     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
661     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
662     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
663 
664     for (i=0; i<mbs; i++) {
665       rvals[0] = bs*(baij->rstartbs + i);
666       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
667       for (j=ai[i]; j<ai[i+1]; j++) {
668         col = (baij->cstartbs+aj[j])*bs;
669         for (k=0; k<bs; k++) {
670           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
671           col++;
672           a += bs;
673         }
674       }
675     }
676     /* copy over the B part */
677     Bloc = (Mat_SeqBAIJ*)baij->B->data;
678     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
679     for (i=0; i<mbs; i++) {
680 
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = baij->garray[aj[j]]*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++;
688           a += bs;
689         }
690       }
691     }
692     ierr = PetscFree(rvals);CHKERRQ(ierr);
693     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695     /*
696        Everyone has to call to draw the matrix since the graphics waits are
697        synchronized across all processors that share the PetscDraw object
698     */
699     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
700     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
701     if (!rank) {
702       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
703       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
704     }
705     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
706     ierr = MatDestroy(&A);CHKERRQ(ierr);
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatView_MPISBAIJ"
713 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
714 {
715   PetscErrorCode ierr;
716   PetscBool      iascii,isdraw,issocket,isbinary;
717 
718   PetscFunctionBegin;
719   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
720   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
722   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
723   if (iascii || isdraw || issocket || isbinary) {
724     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
725   }
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatDestroy_MPISBAIJ"
731 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
732 {
733   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737 #if defined(PETSC_USE_LOG)
738   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
739 #endif
740   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
741   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
742   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
743   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
744 #if defined(PETSC_USE_CTABLE)
745   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
746 #else
747   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
748 #endif
749   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
750   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
751   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
752   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
753   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
754   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
755   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
756   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
757   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
758   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
759   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
760   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
761   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
762   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
763   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
764 #if defined(PETSC_USE_REAL_MAT_SINGLE)
765   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
766 #endif
767   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
768   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
769   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
770   ierr = PetscFree(mat->data);CHKERRQ(ierr);
771 
772   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
773   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
774   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
775   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
776   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
783 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
784 {
785   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
786   PetscErrorCode ierr;
787   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
788   PetscScalar    *x,*from;
789 
790   PetscFunctionBegin;
791   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
792   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
793 
794   /* diagonal part */
795   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
796   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
797 
798   /* subdiagonal part */
799   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
800 
801   /* copy x into the vec slvec0 */
802   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
803   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
804 
805   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
806   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
807   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
808 
809   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
810   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
811   /* supperdiagonal part */
812   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatMult_MPISBAIJ"
818 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
819 {
820   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
821   PetscErrorCode ierr;
822   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
823   PetscScalar    *x,*from;
824 
825   PetscFunctionBegin;
826   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
827   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
828 
829   /* diagonal part */
830   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
831   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
832 
833   /* subdiagonal part */
834   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
835 
836   /* copy x into the vec slvec0 */
837   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
838   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
839 
840   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
841   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
842   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
843 
844   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
845   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
846   /* supperdiagonal part */
847   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
853 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
854 {
855   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
856   PetscErrorCode ierr;
857   PetscInt       nt;
858 
859   PetscFunctionBegin;
860   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
861   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
862 
863   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
864   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
865 
866   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   /* do diagonal part */
868   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
869   /* do supperdiagonal part */
870   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
872   /* do subdiagonal part */
873   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
874   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
875   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
881 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
882 {
883   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
884   PetscErrorCode ierr;
885   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
886   PetscScalar    *x,*from,zero=0.0;
887 
888   PetscFunctionBegin;
889   /*
890   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
891   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
892   */
893   /* diagonal part */
894   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
895   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
896 
897   /* subdiagonal part */
898   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
899 
900   /* copy x into the vec slvec0 */
901   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
902   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
903   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
904   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
905 
906   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
908   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909 
910   /* supperdiagonal part */
911   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
917 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
918 {
919   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
920   PetscErrorCode ierr;
921 
922   PetscFunctionBegin;
923   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   /* do diagonal part */
925   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
926   /* do supperdiagonal part */
927   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
929 
930   /* do subdiagonal part */
931   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
932   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
933   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
934   PetscFunctionReturn(0);
935 }
936 
937 /*
938   This only works correctly for square matrices where the subblock A->A is the
939    diagonal block
940 */
941 #undef __FUNCT__
942 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
943 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
944 {
945   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
950   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatScale_MPISBAIJ"
956 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
957 {
958   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
963   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatGetRow_MPISBAIJ"
969 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
970 {
971   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
972   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
973   PetscErrorCode ierr;
974   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
975   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
976   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
977 
978   PetscFunctionBegin;
979   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
980   mat->getrowactive = PETSC_TRUE;
981 
982   if (!mat->rowvalues && (idx || v)) {
983     /*
984         allocate enough space to hold information from the longest row.
985     */
986     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
987     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
988     PetscInt     max = 1,mbs = mat->mbs,tmp;
989     for (i=0; i<mbs; i++) {
990       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
991       if (max < tmp) max = tmp;
992     }
993     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
994   }
995 
996   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
997   lrow = row - brstart;  /* local row index */
998 
999   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1000   if (!v)   {pvA = 0; pvB = 0;}
1001   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1002   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1003   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1004   nztot = nzA + nzB;
1005 
1006   cmap = mat->garray;
1007   if (v  || idx) {
1008     if (nztot) {
1009       /* Sort by increasing column numbers, assuming A and B already sorted */
1010       PetscInt imark = -1;
1011       if (v) {
1012         *v = v_p = mat->rowvalues;
1013         for (i=0; i<nzB; i++) {
1014           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1015           else break;
1016         }
1017         imark = i;
1018         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1019         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1020       }
1021       if (idx) {
1022         *idx = idx_p = mat->rowindices;
1023         if (imark > -1) {
1024           for (i=0; i<imark; i++) {
1025             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1026           }
1027         } else {
1028           for (i=0; i<nzB; i++) {
1029             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1030             else break;
1031           }
1032           imark = i;
1033         }
1034         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1035         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1036       }
1037     } else {
1038       if (idx) *idx = 0;
1039       if (v)   *v   = 0;
1040     }
1041   }
1042   *nz  = nztot;
1043   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1044   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1050 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1051 {
1052   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1053 
1054   PetscFunctionBegin;
1055   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1056   baij->getrowactive = PETSC_FALSE;
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1062 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1063 {
1064   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1065   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1066 
1067   PetscFunctionBegin;
1068   aA->getrow_utriangular = PETSC_TRUE;
1069   PetscFunctionReturn(0);
1070 }
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1073 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1074 {
1075   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1076   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1077 
1078   PetscFunctionBegin;
1079   aA->getrow_utriangular = PETSC_FALSE;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1085 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1086 {
1087   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1092   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1098 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1099 {
1100   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1105   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1111 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1112 {
1113   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1118   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1124 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1127   Mat            A  = a->A,B = a->B;
1128   PetscErrorCode ierr;
1129   PetscReal      isend[5],irecv[5];
1130 
1131   PetscFunctionBegin;
1132   info->block_size = (PetscReal)matin->rmap->bs;
1133 
1134   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1135 
1136   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1137   isend[3] = info->memory;  isend[4] = info->mallocs;
1138 
1139   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1140 
1141   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1142   isend[3] += info->memory;  isend[4] += info->mallocs;
1143   if (flag == MAT_LOCAL) {
1144     info->nz_used      = isend[0];
1145     info->nz_allocated = isend[1];
1146     info->nz_unneeded  = isend[2];
1147     info->memory       = isend[3];
1148     info->mallocs      = isend[4];
1149   } else if (flag == MAT_GLOBAL_MAX) {
1150     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1151 
1152     info->nz_used      = irecv[0];
1153     info->nz_allocated = irecv[1];
1154     info->nz_unneeded  = irecv[2];
1155     info->memory       = irecv[3];
1156     info->mallocs      = irecv[4];
1157   } else if (flag == MAT_GLOBAL_SUM) {
1158     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1159 
1160     info->nz_used      = irecv[0];
1161     info->nz_allocated = irecv[1];
1162     info->nz_unneeded  = irecv[2];
1163     info->memory       = irecv[3];
1164     info->mallocs      = irecv[4];
1165   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1166   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1167   info->fill_ratio_needed = 0;
1168   info->factor_mallocs    = 0;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1174 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1175 {
1176   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1177   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181   switch (op) {
1182   case MAT_NEW_NONZERO_LOCATIONS:
1183   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1184   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1185   case MAT_KEEP_NONZERO_PATTERN:
1186   case MAT_NEW_NONZERO_LOCATION_ERR:
1187     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1188     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1189     break;
1190   case MAT_ROW_ORIENTED:
1191     a->roworiented = flg;
1192 
1193     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1194     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1195     break;
1196   case MAT_NEW_DIAGONALS:
1197     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1198     break;
1199   case MAT_IGNORE_OFF_PROC_ENTRIES:
1200     a->donotstash = flg;
1201     break;
1202   case MAT_USE_HASH_TABLE:
1203     a->ht_flag = flg;
1204     break;
1205   case MAT_HERMITIAN:
1206     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208 
1209     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1210     break;
1211   case MAT_SPD:
1212     A->spd_set = PETSC_TRUE;
1213     A->spd     = flg;
1214     if (flg) {
1215       A->symmetric                  = PETSC_TRUE;
1216       A->structurally_symmetric     = PETSC_TRUE;
1217       A->symmetric_set              = PETSC_TRUE;
1218       A->structurally_symmetric_set = PETSC_TRUE;
1219     }
1220     break;
1221   case MAT_SYMMETRIC:
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     break;
1224   case MAT_STRUCTURALLY_SYMMETRIC:
1225     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1226     break;
1227   case MAT_SYMMETRY_ETERNAL:
1228     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1229     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1230     break;
1231   case MAT_IGNORE_LOWER_TRIANGULAR:
1232     aA->ignore_ltriangular = flg;
1233     break;
1234   case MAT_ERROR_LOWER_TRIANGULAR:
1235     aA->ignore_ltriangular = flg;
1236     break;
1237   case MAT_GETROW_UPPERTRIANGULAR:
1238     aA->getrow_utriangular = flg;
1239     break;
1240   default:
1241     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1248 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1249 {
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   if (MAT_INITIAL_MATRIX || *B != A) {
1254     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1261 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1262 {
1263   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1264   Mat            a     = baij->A, b=baij->B;
1265   PetscErrorCode ierr;
1266   PetscInt       nv,m,n;
1267   PetscBool      flg;
1268 
1269   PetscFunctionBegin;
1270   if (ll != rr) {
1271     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1272     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1273   }
1274   if (!ll) PetscFunctionReturn(0);
1275 
1276   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1277   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1278 
1279   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1280   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1281 
1282   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283 
1284   /* left diagonalscale the off-diagonal part */
1285   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1286 
1287   /* scale the diagonal part */
1288   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1289 
1290   /* right diagonalscale the off-diagonal part */
1291   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1292   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1298 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1299 {
1300   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302 
1303   PetscFunctionBegin;
1304   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatEqual_MPISBAIJ"
1312 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1313 {
1314   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1315   Mat            a,b,c,d;
1316   PetscBool      flg;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   a = matA->A; b = matA->B;
1321   c = matB->A; d = matB->B;
1322 
1323   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1324   if (flg) {
1325     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1326   }
1327   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatCopy_MPISBAIJ"
1333 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1334 {
1335   PetscErrorCode ierr;
1336   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1337   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1338 
1339   PetscFunctionBegin;
1340   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1341   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1342     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1343     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1344     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1345   } else {
1346     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1347     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1354 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1355 {
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1365 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1366 {
1367   PetscErrorCode ierr;
1368   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1369   PetscBLASInt   bnz,one=1;
1370   Mat_SeqSBAIJ   *xa,*ya;
1371   Mat_SeqBAIJ    *xb,*yb;
1372 
1373   PetscFunctionBegin;
1374   if (str == SAME_NONZERO_PATTERN) {
1375     PetscScalar alpha = a;
1376     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1377     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1378     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1379     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1380     xb   = (Mat_SeqBAIJ*)xx->B->data;
1381     yb   = (Mat_SeqBAIJ*)yy->B->data;
1382     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1383     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1384     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1385   } else {
1386     Mat      B;
1387     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1388     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1389     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1390     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1391     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1392     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1393     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1394     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1395     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1396     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1397     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1398     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1399     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1400     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1401     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1402     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1403     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1404     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1405     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1406     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1414 {
1415   PetscErrorCode ierr;
1416   PetscInt       i;
1417   PetscBool      flg;
1418 
1419   PetscFunctionBegin;
1420   for (i=0; i<n; i++) {
1421     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1422     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1423   }
1424   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 /* -------------------------------------------------------------------*/
1430 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1431                                        MatGetRow_MPISBAIJ,
1432                                        MatRestoreRow_MPISBAIJ,
1433                                        MatMult_MPISBAIJ,
1434                                /*  4*/ MatMultAdd_MPISBAIJ,
1435                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1436                                        MatMultAdd_MPISBAIJ,
1437                                        0,
1438                                        0,
1439                                        0,
1440                                /* 10*/ 0,
1441                                        0,
1442                                        0,
1443                                        MatSOR_MPISBAIJ,
1444                                        MatTranspose_MPISBAIJ,
1445                                /* 15*/ MatGetInfo_MPISBAIJ,
1446                                        MatEqual_MPISBAIJ,
1447                                        MatGetDiagonal_MPISBAIJ,
1448                                        MatDiagonalScale_MPISBAIJ,
1449                                        MatNorm_MPISBAIJ,
1450                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1451                                        MatAssemblyEnd_MPISBAIJ,
1452                                        MatSetOption_MPISBAIJ,
1453                                        MatZeroEntries_MPISBAIJ,
1454                                /* 24*/ 0,
1455                                        0,
1456                                        0,
1457                                        0,
1458                                        0,
1459                                /* 29*/ MatSetUp_MPISBAIJ,
1460                                        0,
1461                                        0,
1462                                        0,
1463                                        0,
1464                                /* 34*/ MatDuplicate_MPISBAIJ,
1465                                        0,
1466                                        0,
1467                                        0,
1468                                        0,
1469                                /* 39*/ MatAXPY_MPISBAIJ,
1470                                        MatGetSubMatrices_MPISBAIJ,
1471                                        MatIncreaseOverlap_MPISBAIJ,
1472                                        MatGetValues_MPISBAIJ,
1473                                        MatCopy_MPISBAIJ,
1474                                /* 44*/ 0,
1475                                        MatScale_MPISBAIJ,
1476                                        0,
1477                                        0,
1478                                        0,
1479                                /* 49*/ 0,
1480                                        0,
1481                                        0,
1482                                        0,
1483                                        0,
1484                                /* 54*/ 0,
1485                                        0,
1486                                        MatSetUnfactored_MPISBAIJ,
1487                                        0,
1488                                        MatSetValuesBlocked_MPISBAIJ,
1489                                /* 59*/ 0,
1490                                        0,
1491                                        0,
1492                                        0,
1493                                        0,
1494                                /* 64*/ 0,
1495                                        0,
1496                                        0,
1497                                        0,
1498                                        0,
1499                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1500                                        0,
1501                                        0,
1502                                        0,
1503                                        0,
1504                                /* 74*/ 0,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                        0,
1509                                /* 79*/ 0,
1510                                        0,
1511                                        0,
1512                                        0,
1513                                        MatLoad_MPISBAIJ,
1514                                /* 84*/ 0,
1515                                        0,
1516                                        0,
1517                                        0,
1518                                        0,
1519                                /* 89*/ 0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                        0,
1524                                /* 94*/ 0,
1525                                        0,
1526                                        0,
1527                                        0,
1528                                        0,
1529                                /* 99*/ 0,
1530                                        0,
1531                                        0,
1532                                        0,
1533                                        0,
1534                                /*104*/ 0,
1535                                        MatRealPart_MPISBAIJ,
1536                                        MatImaginaryPart_MPISBAIJ,
1537                                        MatGetRowUpperTriangular_MPISBAIJ,
1538                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1539                                /*109*/ 0,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                        0,
1544                                /*114*/ 0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                        0,
1549                                /*119*/ 0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                        0,
1554                                /*124*/ 0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                        0,
1559                                /*129*/ 0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                        0,
1564                                /*134*/ 0,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                        0,
1569                                /*139*/ 0,
1570                                        0,
1571                                        0
1572 };
1573 
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1577 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1578 {
1579   PetscFunctionBegin;
1580   *a = ((Mat_MPISBAIJ*)A->data)->A;
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1586 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1587 {
1588   Mat_MPISBAIJ   *b;
1589   PetscErrorCode ierr;
1590   PetscInt       i,mbs,Mbs;
1591 
1592   PetscFunctionBegin;
1593   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1594   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1595   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1596   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1597 
1598   b   = (Mat_MPISBAIJ*)B->data;
1599   mbs = B->rmap->n/bs;
1600   Mbs = B->rmap->N/bs;
1601   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1602 
1603   B->rmap->bs = bs;
1604   b->bs2      = bs*bs;
1605   b->mbs      = mbs;
1606   b->nbs      = mbs;
1607   b->Mbs      = Mbs;
1608   b->Nbs      = Mbs;
1609 
1610   for (i=0; i<=b->size; i++) {
1611     b->rangebs[i] = B->rmap->range[i]/bs;
1612   }
1613   b->rstartbs = B->rmap->rstart/bs;
1614   b->rendbs   = B->rmap->rend/bs;
1615 
1616   b->cstartbs = B->cmap->rstart/bs;
1617   b->cendbs   = B->cmap->rend/bs;
1618 
1619   if (!B->preallocated) {
1620     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1621     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1622     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1623     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1624     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1625     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1626     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1627     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1628     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1629   }
1630 
1631   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1632   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1633 
1634   B->preallocated = PETSC_TRUE;
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1640 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1641 {
1642   PetscInt       m,rstart,cstart,cend;
1643   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1644   const PetscInt *JJ    =0;
1645   PetscScalar    *values=0;
1646   PetscErrorCode ierr;
1647 
1648   PetscFunctionBegin;
1649   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1650   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1651   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1652   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1653   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1654   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1655   m      = B->rmap->n/bs;
1656   rstart = B->rmap->rstart/bs;
1657   cstart = B->cmap->rstart/bs;
1658   cend   = B->cmap->rend/bs;
1659 
1660   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1661   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) {
1663     nz = ii[i+1] - ii[i];
1664     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1665     nz_max = PetscMax(nz_max,nz);
1666     JJ     = jj + ii[i];
1667     for (j=0; j<nz; j++) {
1668       if (*JJ >= cstart) break;
1669       JJ++;
1670     }
1671     d = 0;
1672     for (; j<nz; j++) {
1673       if (*JJ++ >= cend) break;
1674       d++;
1675     }
1676     d_nnz[i] = d;
1677     o_nnz[i] = nz - d;
1678   }
1679   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1680   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1681 
1682   values = (PetscScalar*)V;
1683   if (!values) {
1684     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1685     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1686   }
1687   for (i=0; i<m; i++) {
1688     PetscInt          row    = i + rstart;
1689     PetscInt          ncols  = ii[i+1] - ii[i];
1690     const PetscInt    *icols = jj + ii[i];
1691     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1692     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1693   }
1694 
1695   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1696   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #if defined(PETSC_HAVE_MUMPS)
1703 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1704 #endif
1705 #if defined(PETSC_HAVE_PASTIX)
1706 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1707 #endif
1708 
1709 /*MC
1710    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1711    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1712    the matrix is stored.
1713 
1714   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1715   can call MatSetOption(Mat, MAT_HERMITIAN);
1716 
1717    Options Database Keys:
1718 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1719 
1720   Level: beginner
1721 
1722 .seealso: MatCreateMPISBAIJ
1723 M*/
1724 
1725 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatCreate_MPISBAIJ"
1729 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1730 {
1731   Mat_MPISBAIJ   *b;
1732   PetscErrorCode ierr;
1733   PetscBool      flg;
1734 
1735   PetscFunctionBegin;
1736   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1737   B->data = (void*)b;
1738   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1739 
1740   B->ops->destroy = MatDestroy_MPISBAIJ;
1741   B->ops->view    = MatView_MPISBAIJ;
1742   B->assembled    = PETSC_FALSE;
1743   B->insertmode   = NOT_SET_VALUES;
1744 
1745   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1746   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1747 
1748   /* build local table of row and column ownerships */
1749   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1750 
1751   /* build cache for off array entries formed */
1752   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1753 
1754   b->donotstash  = PETSC_FALSE;
1755   b->colmap      = NULL;
1756   b->garray      = NULL;
1757   b->roworiented = PETSC_TRUE;
1758 
1759   /* stuff used in block assembly */
1760   b->barray = 0;
1761 
1762   /* stuff used for matrix vector multiply */
1763   b->lvec    = 0;
1764   b->Mvctx   = 0;
1765   b->slvec0  = 0;
1766   b->slvec0b = 0;
1767   b->slvec1  = 0;
1768   b->slvec1a = 0;
1769   b->slvec1b = 0;
1770   b->sMvctx  = 0;
1771 
1772   /* stuff for MatGetRow() */
1773   b->rowindices   = 0;
1774   b->rowvalues    = 0;
1775   b->getrowactive = PETSC_FALSE;
1776 
1777   /* hash table stuff */
1778   b->ht           = 0;
1779   b->hd           = 0;
1780   b->ht_size      = 0;
1781   b->ht_flag      = PETSC_FALSE;
1782   b->ht_fact      = 0;
1783   b->ht_total_ct  = 0;
1784   b->ht_insert_ct = 0;
1785 
1786   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1787   b->ijonly = PETSC_FALSE;
1788 
1789   b->in_loc = 0;
1790   b->v_loc  = 0;
1791   b->n_loc  = 0;
1792   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1793   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1794   if (flg) {
1795     PetscReal fact = 1.39;
1796     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1798     if (fact <= 1.0) fact = 1.39;
1799     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1800     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1801   }
1802   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1803 
1804 #if defined(PETSC_HAVE_PASTIX)
1805   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1806 #endif
1807 #if defined(PETSC_HAVE_MUMPS)
1808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1809 #endif
1810   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1816 
1817   B->symmetric                  = PETSC_TRUE;
1818   B->structurally_symmetric     = PETSC_TRUE;
1819   B->symmetric_set              = PETSC_TRUE;
1820   B->structurally_symmetric_set = PETSC_TRUE;
1821 
1822   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 /*MC
1827    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1828 
1829    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1830    and MATMPISBAIJ otherwise.
1831 
1832    Options Database Keys:
1833 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1834 
1835   Level: beginner
1836 
1837 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1838 M*/
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1842 /*@C
1843    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1844    the user should preallocate the matrix storage by setting the parameters
1845    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1846    performance can be increased by more than a factor of 50.
1847 
1848    Collective on Mat
1849 
1850    Input Parameters:
1851 +  B - the matrix
1852 .  bs   - size of blockk
1853 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1854            submatrix  (same for all local rows)
1855 .  d_nnz - array containing the number of block nonzeros in the various block rows
1856            in the upper triangular and diagonal part of the in diagonal portion of the local
1857            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1858            for the diagonal entry and set a value even if it is zero.
1859 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1860            submatrix (same for all local rows).
1861 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1862            off-diagonal portion of the local submatrix that is right of the diagonal
1863            (possibly different for each block row) or NULL.
1864 
1865 
1866    Options Database Keys:
1867 .   -mat_no_unroll - uses code that does not unroll the loops in the
1868                      block calculations (much slower)
1869 .   -mat_block_size - size of the blocks to use
1870 
1871    Notes:
1872 
1873    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1874    than it must be used on all processors that share the object for that argument.
1875 
1876    If the *_nnz parameter is given then the *_nz parameter is ignored
1877 
1878    Storage Information:
1879    For a square global matrix we define each processor's diagonal portion
1880    to be its local rows and the corresponding columns (a square submatrix);
1881    each processor's off-diagonal portion encompasses the remainder of the
1882    local matrix (a rectangular submatrix).
1883 
1884    The user can specify preallocated storage for the diagonal part of
1885    the local submatrix with either d_nz or d_nnz (not both).  Set
1886    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1887    memory allocation.  Likewise, specify preallocated storage for the
1888    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1889 
1890    You can call MatGetInfo() to get information on how effective the preallocation was;
1891    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1892    You can also run with the option -info and look for messages with the string
1893    malloc in them to see if additional memory allocation was needed.
1894 
1895    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1896    the figure below we depict these three local rows and all columns (0-11).
1897 
1898 .vb
1899            0 1 2 3 4 5 6 7 8 9 10 11
1900           --------------------------
1901    row 3  |. . . d d d o o o o  o  o
1902    row 4  |. . . d d d o o o o  o  o
1903    row 5  |. . . d d d o o o o  o  o
1904           --------------------------
1905 .ve
1906 
1907    Thus, any entries in the d locations are stored in the d (diagonal)
1908    submatrix, and any entries in the o locations are stored in the
1909    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1910    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1911 
1912    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1913    plus the diagonal part of the d matrix,
1914    and o_nz should indicate the number of block nonzeros per row in the o matrix
1915 
1916    In general, for PDE problems in which most nonzeros are near the diagonal,
1917    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1918    or you will get TERRIBLE performance; see the users' manual chapter on
1919    matrices.
1920 
1921    Level: intermediate
1922 
1923 .keywords: matrix, block, aij, compressed row, sparse, parallel
1924 
1925 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1926 @*/
1927 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1928 {
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1933   PetscValidType(B,1);
1934   PetscValidLogicalCollectiveInt(B,bs,2);
1935   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatCreateSBAIJ"
1941 /*@C
1942    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1943    (block compressed row).  For good matrix assembly performance
1944    the user should preallocate the matrix storage by setting the parameters
1945    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1946    performance can be increased by more than a factor of 50.
1947 
1948    Collective on MPI_Comm
1949 
1950    Input Parameters:
1951 +  comm - MPI communicator
1952 .  bs   - size of blockk
1953 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1954            This value should be the same as the local size used in creating the
1955            y vector for the matrix-vector product y = Ax.
1956 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1957            This value should be the same as the local size used in creating the
1958            x vector for the matrix-vector product y = Ax.
1959 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1960 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1961 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1962            submatrix  (same for all local rows)
1963 .  d_nnz - array containing the number of block nonzeros in the various block rows
1964            in the upper triangular portion of the in diagonal portion of the local
1965            (possibly different for each block block row) or NULL.
1966            If you plan to factor the matrix you must leave room for the diagonal entry and
1967            set its value even if it is zero.
1968 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1969            submatrix (same for all local rows).
1970 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1971            off-diagonal portion of the local submatrix (possibly different for
1972            each block row) or NULL.
1973 
1974    Output Parameter:
1975 .  A - the matrix
1976 
1977    Options Database Keys:
1978 .   -mat_no_unroll - uses code that does not unroll the loops in the
1979                      block calculations (much slower)
1980 .   -mat_block_size - size of the blocks to use
1981 .   -mat_mpi - use the parallel matrix data structures even on one processor
1982                (defaults to using SeqBAIJ format on one processor)
1983 
1984    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1985    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1986    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1987 
1988    Notes:
1989    The number of rows and columns must be divisible by blocksize.
1990    This matrix type does not support complex Hermitian operation.
1991 
1992    The user MUST specify either the local or global matrix dimensions
1993    (possibly both).
1994 
1995    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1996    than it must be used on all processors that share the object for that argument.
1997 
1998    If the *_nnz parameter is given then the *_nz parameter is ignored
1999 
2000    Storage Information:
2001    For a square global matrix we define each processor's diagonal portion
2002    to be its local rows and the corresponding columns (a square submatrix);
2003    each processor's off-diagonal portion encompasses the remainder of the
2004    local matrix (a rectangular submatrix).
2005 
2006    The user can specify preallocated storage for the diagonal part of
2007    the local submatrix with either d_nz or d_nnz (not both).  Set
2008    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2009    memory allocation.  Likewise, specify preallocated storage for the
2010    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2011 
2012    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2013    the figure below we depict these three local rows and all columns (0-11).
2014 
2015 .vb
2016            0 1 2 3 4 5 6 7 8 9 10 11
2017           --------------------------
2018    row 3  |. . . d d d o o o o  o  o
2019    row 4  |. . . d d d o o o o  o  o
2020    row 5  |. . . d d d o o o o  o  o
2021           --------------------------
2022 .ve
2023 
2024    Thus, any entries in the d locations are stored in the d (diagonal)
2025    submatrix, and any entries in the o locations are stored in the
2026    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2027    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2028 
2029    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2030    plus the diagonal part of the d matrix,
2031    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2032    In general, for PDE problems in which most nonzeros are near the diagonal,
2033    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2034    or you will get TERRIBLE performance; see the users' manual chapter on
2035    matrices.
2036 
2037    Level: intermediate
2038 
2039 .keywords: matrix, block, aij, compressed row, sparse, parallel
2040 
2041 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2042 @*/
2043 
2044 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2045 {
2046   PetscErrorCode ierr;
2047   PetscMPIInt    size;
2048 
2049   PetscFunctionBegin;
2050   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2051   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2053   if (size > 1) {
2054     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2055     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2056   } else {
2057     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2058     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2059   }
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2066 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2067 {
2068   Mat            mat;
2069   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2070   PetscErrorCode ierr;
2071   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2072   PetscScalar    *array;
2073 
2074   PetscFunctionBegin;
2075   *newmat = 0;
2076 
2077   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2078   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2079   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2080   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2081   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2082   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2083 
2084   mat->factortype   = matin->factortype;
2085   mat->preallocated = PETSC_TRUE;
2086   mat->assembled    = PETSC_TRUE;
2087   mat->insertmode   = NOT_SET_VALUES;
2088 
2089   a      = (Mat_MPISBAIJ*)mat->data;
2090   a->bs2 = oldmat->bs2;
2091   a->mbs = oldmat->mbs;
2092   a->nbs = oldmat->nbs;
2093   a->Mbs = oldmat->Mbs;
2094   a->Nbs = oldmat->Nbs;
2095 
2096 
2097   a->size         = oldmat->size;
2098   a->rank         = oldmat->rank;
2099   a->donotstash   = oldmat->donotstash;
2100   a->roworiented  = oldmat->roworiented;
2101   a->rowindices   = 0;
2102   a->rowvalues    = 0;
2103   a->getrowactive = PETSC_FALSE;
2104   a->barray       = 0;
2105   a->rstartbs     = oldmat->rstartbs;
2106   a->rendbs       = oldmat->rendbs;
2107   a->cstartbs     = oldmat->cstartbs;
2108   a->cendbs       = oldmat->cendbs;
2109 
2110   /* hash table stuff */
2111   a->ht           = 0;
2112   a->hd           = 0;
2113   a->ht_size      = 0;
2114   a->ht_flag      = oldmat->ht_flag;
2115   a->ht_fact      = oldmat->ht_fact;
2116   a->ht_total_ct  = 0;
2117   a->ht_insert_ct = 0;
2118 
2119   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2120   if (oldmat->colmap) {
2121 #if defined(PETSC_USE_CTABLE)
2122     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2123 #else
2124     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2125     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2126     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2127 #endif
2128   } else a->colmap = 0;
2129 
2130   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2131     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2132     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2133     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2134   } else a->garray = 0;
2135 
2136   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2137   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2139   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2140   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2141 
2142   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2144   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2146 
2147   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2148   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2149   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2150   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2151   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2152   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2153   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2154   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2158   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2159   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2160 
2161   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2162   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2163   a->sMvctx = oldmat->sMvctx;
2164   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2165 
2166   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2167   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2168   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2169   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2170   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2171   *newmat = mat;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatLoad_MPISBAIJ"
2177 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2178 {
2179   PetscErrorCode ierr;
2180   PetscInt       i,nz,j,rstart,rend;
2181   PetscScalar    *vals,*buf;
2182   MPI_Comm       comm;
2183   MPI_Status     status;
2184   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2185   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2186   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2187   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2188   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2189   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2190   int            fd;
2191 
2192   PetscFunctionBegin;
2193   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2194   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2195   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2196   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2197   if (bs < 0) bs = 1;
2198 
2199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2200   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2201   if (!rank) {
2202     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2203     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2204     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2205     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2206   }
2207 
2208   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2209 
2210   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2211   M    = header[1];
2212   N    = header[2];
2213 
2214   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2215   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2216   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2217 
2218   /* If global sizes are set, check if they are consistent with that given in the file */
2219   if (sizesset) {
2220     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2221   }
2222   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2223   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2224 
2225   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2226 
2227   /*
2228      This code adds extra rows to make sure the number of rows is
2229      divisible by the blocksize
2230   */
2231   Mbs        = M/bs;
2232   extra_rows = bs - M + bs*(Mbs);
2233   if (extra_rows == bs) extra_rows = 0;
2234   else                  Mbs++;
2235   if (extra_rows &&!rank) {
2236     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2237   }
2238 
2239   /* determine ownership of all rows */
2240   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2241     mbs = Mbs/size + ((Mbs % size) > rank);
2242     m   = mbs*bs;
2243   } else { /* User Set */
2244     m   = newmat->rmap->n;
2245     mbs = m/bs;
2246   }
2247   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2248   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2249   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2250   rowners[0] = 0;
2251   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2252   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2253   rstart = rowners[rank];
2254   rend   = rowners[rank+1];
2255 
2256   /* distribute row lengths to all processors */
2257   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2258   if (!rank) {
2259     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2260     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2261     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2262     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2263     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2264     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2265     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2266   } else {
2267     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2268   }
2269 
2270   if (!rank) {   /* procs[0] */
2271     /* calculate the number of nonzeros on each processor */
2272     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2273     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2274     for (i=0; i<size; i++) {
2275       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2276         procsnz[i] += rowlengths[j];
2277       }
2278     }
2279     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2280 
2281     /* determine max buffer needed and allocate it */
2282     maxnz = 0;
2283     for (i=0; i<size; i++) {
2284       maxnz = PetscMax(maxnz,procsnz[i]);
2285     }
2286     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2287 
2288     /* read in my part of the matrix column indices  */
2289     nz     = procsnz[0];
2290     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2291     mycols = ibuf;
2292     if (size == 1) nz -= extra_rows;
2293     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2294     if (size == 1) {
2295       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2296     }
2297 
2298     /* read in every ones (except the last) and ship off */
2299     for (i=1; i<size-1; i++) {
2300       nz   = procsnz[i];
2301       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2302       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2303     }
2304     /* read in the stuff for the last proc */
2305     if (size != 1) {
2306       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2307       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2308       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2309       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2310     }
2311     ierr = PetscFree(cols);CHKERRQ(ierr);
2312   } else {  /* procs[i], i>0 */
2313     /* determine buffer space needed for message */
2314     nz = 0;
2315     for (i=0; i<m; i++) nz += locrowlens[i];
2316     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2317     mycols = ibuf;
2318     /* receive message of column indices*/
2319     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2320     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2321     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2322   }
2323 
2324   /* loop over local rows, determining number of off diagonal entries */
2325   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2326   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2327   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2328   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2329   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2330   rowcount = 0;
2331   nzcount  = 0;
2332   for (i=0; i<mbs; i++) {
2333     dcount  = 0;
2334     odcount = 0;
2335     for (j=0; j<bs; j++) {
2336       kmax = locrowlens[rowcount];
2337       for (k=0; k<kmax; k++) {
2338         tmp = mycols[nzcount++]/bs; /* block col. index */
2339         if (!mask[tmp]) {
2340           mask[tmp] = 1;
2341           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2342           else masked1[dcount++] = tmp; /* entry in diag portion */
2343         }
2344       }
2345       rowcount++;
2346     }
2347 
2348     dlens[i]  = dcount;  /* d_nzz[i] */
2349     odlens[i] = odcount; /* o_nzz[i] */
2350 
2351     /* zero out the mask elements we set */
2352     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2353     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2354   }
2355   if (!sizesset) {
2356     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2357   }
2358   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2359   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2360 
2361   if (!rank) {
2362     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2363     /* read in my part of the matrix numerical values  */
2364     nz     = procsnz[0];
2365     vals   = buf;
2366     mycols = ibuf;
2367     if (size == 1) nz -= extra_rows;
2368     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2369     if (size == 1) {
2370       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2371     }
2372 
2373     /* insert into matrix */
2374     jj = rstart*bs;
2375     for (i=0; i<m; i++) {
2376       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2377       mycols += locrowlens[i];
2378       vals   += locrowlens[i];
2379       jj++;
2380     }
2381 
2382     /* read in other processors (except the last one) and ship out */
2383     for (i=1; i<size-1; i++) {
2384       nz   = procsnz[i];
2385       vals = buf;
2386       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2387       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2388     }
2389     /* the last proc */
2390     if (size != 1) {
2391       nz   = procsnz[i] - extra_rows;
2392       vals = buf;
2393       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2394       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2395       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2396     }
2397     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2398 
2399   } else {
2400     /* receive numeric values */
2401     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2402 
2403     /* receive message of values*/
2404     vals   = buf;
2405     mycols = ibuf;
2406     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2407     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2408     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2409 
2410     /* insert into matrix */
2411     jj = rstart*bs;
2412     for (i=0; i<m; i++) {
2413       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2414       mycols += locrowlens[i];
2415       vals   += locrowlens[i];
2416       jj++;
2417     }
2418   }
2419 
2420   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2421   ierr = PetscFree(buf);CHKERRQ(ierr);
2422   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2423   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2424   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2425   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2426   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2427   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2428   PetscFunctionReturn(0);
2429 }
2430 
2431 #undef __FUNCT__
2432 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2433 /*XXXXX@
2434    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2435 
2436    Input Parameters:
2437 .  mat  - the matrix
2438 .  fact - factor
2439 
2440    Not Collective on Mat, each process can have a different hash factor
2441 
2442    Level: advanced
2443 
2444   Notes:
2445    This can also be set by the command line option: -mat_use_hash_table fact
2446 
2447 .keywords: matrix, hashtable, factor, HT
2448 
2449 .seealso: MatSetOption()
2450 @XXXXX*/
2451 
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2455 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2456 {
2457   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2458   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2459   PetscReal      atmp;
2460   PetscReal      *work,*svalues,*rvalues;
2461   PetscErrorCode ierr;
2462   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2463   PetscMPIInt    rank,size;
2464   PetscInt       *rowners_bs,dest,count,source;
2465   PetscScalar    *va;
2466   MatScalar      *ba;
2467   MPI_Status     stat;
2468 
2469   PetscFunctionBegin;
2470   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2471   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2472   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2473 
2474   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2475   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2476 
2477   bs  = A->rmap->bs;
2478   mbs = a->mbs;
2479   Mbs = a->Mbs;
2480   ba  = b->a;
2481   bi  = b->i;
2482   bj  = b->j;
2483 
2484   /* find ownerships */
2485   rowners_bs = A->rmap->range;
2486 
2487   /* each proc creates an array to be distributed */
2488   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2489   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2490 
2491   /* row_max for B */
2492   if (rank != size-1) {
2493     for (i=0; i<mbs; i++) {
2494       ncols = bi[1] - bi[0]; bi++;
2495       brow  = bs*i;
2496       for (j=0; j<ncols; j++) {
2497         bcol = bs*(*bj);
2498         for (kcol=0; kcol<bs; kcol++) {
2499           col  = bcol + kcol;                /* local col index */
2500           col += rowners_bs[rank+1];      /* global col index */
2501           for (krow=0; krow<bs; krow++) {
2502             atmp = PetscAbsScalar(*ba); ba++;
2503             row  = brow + krow;   /* local row index */
2504             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2505             if (work[col] < atmp) work[col] = atmp;
2506           }
2507         }
2508         bj++;
2509       }
2510     }
2511 
2512     /* send values to its owners */
2513     for (dest=rank+1; dest<size; dest++) {
2514       svalues = work + rowners_bs[dest];
2515       count   = rowners_bs[dest+1]-rowners_bs[dest];
2516       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2517     }
2518   }
2519 
2520   /* receive values */
2521   if (rank) {
2522     rvalues = work;
2523     count   = rowners_bs[rank+1]-rowners_bs[rank];
2524     for (source=0; source<rank; source++) {
2525       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2526       /* process values */
2527       for (i=0; i<count; i++) {
2528         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2529       }
2530     }
2531   }
2532 
2533   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2534   ierr = PetscFree(work);CHKERRQ(ierr);
2535   PetscFunctionReturn(0);
2536 }
2537 
2538 #undef __FUNCT__
2539 #define __FUNCT__ "MatSOR_MPISBAIJ"
2540 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2541 {
2542   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2543   PetscErrorCode    ierr;
2544   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2545   PetscScalar       *x,*ptr,*from;
2546   Vec               bb1;
2547   const PetscScalar *b;
2548 
2549   PetscFunctionBegin;
2550   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2551   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2552 
2553   if (flag == SOR_APPLY_UPPER) {
2554     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2555     PetscFunctionReturn(0);
2556   }
2557 
2558   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2559     if (flag & SOR_ZERO_INITIAL_GUESS) {
2560       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2561       its--;
2562     }
2563 
2564     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2565     while (its--) {
2566 
2567       /* lower triangular part: slvec0b = - B^T*xx */
2568       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2569 
2570       /* copy xx into slvec0a */
2571       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2572       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2573       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2574       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2575 
2576       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2577 
2578       /* copy bb into slvec1a */
2579       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2580       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2581       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2582       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2583 
2584       /* set slvec1b = 0 */
2585       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2586 
2587       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2588       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2589       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2590       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2591 
2592       /* upper triangular part: bb1 = bb1 - B*x */
2593       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2594 
2595       /* local diagonal sweep */
2596       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2597     }
2598     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2599   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2600     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2601   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2603   } else if (flag & SOR_EISENSTAT) {
2604     Vec               xx1;
2605     PetscBool         hasop;
2606     const PetscScalar *diag;
2607     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2608     PetscInt          i,n;
2609 
2610     if (!mat->xx1) {
2611       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2612       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2613     }
2614     xx1 = mat->xx1;
2615     bb1 = mat->bb1;
2616 
2617     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2618 
2619     if (!mat->diag) {
2620       /* this is wrong for same matrix with new nonzero values */
2621       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2622       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2623     }
2624     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2625 
2626     if (hasop) {
2627       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2628       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2629     } else {
2630       /*
2631           These two lines are replaced by code that may be a bit faster for a good compiler
2632       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2633       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2634       */
2635       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2636       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2637       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2638       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2639       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2640       if (omega == 1.0) {
2641         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2642         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2643       } else {
2644         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2645         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2646       }
2647       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2648       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2649       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2650       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2651     }
2652 
2653     /* multiply off-diagonal portion of matrix */
2654     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2655     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2656     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2657     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2658     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2659     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2660     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2661     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2662     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2663     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2664 
2665     /* local sweep */
2666     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2667     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2668   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2669   PetscFunctionReturn(0);
2670 }
2671 
2672 #undef __FUNCT__
2673 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2674 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2675 {
2676   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2677   PetscErrorCode ierr;
2678   Vec            lvec1,bb1;
2679 
2680   PetscFunctionBegin;
2681   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2682   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2683 
2684   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2685     if (flag & SOR_ZERO_INITIAL_GUESS) {
2686       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2687       its--;
2688     }
2689 
2690     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2691     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2692     while (its--) {
2693       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2694 
2695       /* lower diagonal part: bb1 = bb - B^T*xx */
2696       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2697       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2698 
2699       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2700       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2701       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2702 
2703       /* upper diagonal part: bb1 = bb1 - B*x */
2704       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2705       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2706 
2707       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2708 
2709       /* diagonal sweep */
2710       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2711     }
2712     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2713     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2714   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2715   PetscFunctionReturn(0);
2716 }
2717 
2718 #undef __FUNCT__
2719 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2720 /*@
2721      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2722          CSR format the local rows.
2723 
2724    Collective on MPI_Comm
2725 
2726    Input Parameters:
2727 +  comm - MPI communicator
2728 .  bs - the block size, only a block size of 1 is supported
2729 .  m - number of local rows (Cannot be PETSC_DECIDE)
2730 .  n - This value should be the same as the local size used in creating the
2731        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2732        calculated if N is given) For square matrices n is almost always m.
2733 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2734 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2735 .   i - row indices
2736 .   j - column indices
2737 -   a - matrix values
2738 
2739    Output Parameter:
2740 .   mat - the matrix
2741 
2742    Level: intermediate
2743 
2744    Notes:
2745        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2746      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2747      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2748 
2749        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2750 
2751 .keywords: matrix, aij, compressed row, sparse, parallel
2752 
2753 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2754           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2755 @*/
2756 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2757 {
2758   PetscErrorCode ierr;
2759 
2760 
2761   PetscFunctionBegin;
2762   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2763   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2764   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2765   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2766   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2767   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2768   PetscFunctionReturn(0);
2769 }
2770 
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2774 /*@C
2775    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2776    (the default parallel PETSc format).
2777 
2778    Collective on MPI_Comm
2779 
2780    Input Parameters:
2781 +  B - the matrix
2782 .  bs - the block size
2783 .  i - the indices into j for the start of each local row (starts with zero)
2784 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2785 -  v - optional values in the matrix
2786 
2787    Level: developer
2788 
2789 .keywords: matrix, aij, compressed row, sparse, parallel
2790 
2791 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2792 @*/
2793 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2794 {
2795   PetscErrorCode ierr;
2796 
2797   PetscFunctionBegin;
2798   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2799   PetscFunctionReturn(0);
2800 }
2801 
2802 
2803