xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 461878b29a91ee2f8c912b31b68f4de725324203)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 #undef __FUNCT__
8 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
9 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
10 {
11   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
12   PetscErrorCode ierr;
13 
14   PetscFunctionBegin;
15   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
16   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
17   PetscFunctionReturn(0);
18 }
19 
20 #undef __FUNCT__
21 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
22 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
23 {
24   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
25   PetscErrorCode ierr;
26 
27   PetscFunctionBegin;
28   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
29   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
30   PetscFunctionReturn(0);
31 }
32 
33 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
34   { \
35  \
36     brow = row/bs;  \
37     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
38     rmax = aimax[brow]; nrow = ailen[brow]; \
39     bcol = col/bs; \
40     ridx = row % bs; cidx = col % bs; \
41     low  = 0; high = nrow; \
42     while (high-low > 3) { \
43       t = (low+high)/2; \
44       if (rp[t] > bcol) high = t; \
45       else              low  = t; \
46     } \
47     for (_i=low; _i<high; _i++) { \
48       if (rp[_i] > bcol) break; \
49       if (rp[_i] == bcol) { \
50         bap = ap + bs2*_i + bs*cidx + ridx; \
51         if (addv == ADD_VALUES) *bap += value;  \
52         else                    *bap  = value;  \
53         goto a_noinsert; \
54       } \
55     } \
56     if (a->nonew == 1) goto a_noinsert; \
57     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
58     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
59     N = nrow++ - 1;  \
60     /* shift up all the later entries in this row */ \
61     for (ii=N; ii>=_i; ii--) { \
62       rp[ii+1] = rp[ii]; \
63       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
64     } \
65     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
66     rp[_i]                      = bcol;  \
67     ap[bs2*_i + bs*cidx + ridx] = value;  \
68     A->nonzerostate++;\
69 a_noinsert:; \
70     ailen[brow] = nrow; \
71   }
72 
73 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
74   { \
75     brow = row/bs;  \
76     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
77     rmax = bimax[brow]; nrow = bilen[brow]; \
78     bcol = col/bs; \
79     ridx = row % bs; cidx = col % bs; \
80     low  = 0; high = nrow; \
81     while (high-low > 3) { \
82       t = (low+high)/2; \
83       if (rp[t] > bcol) high = t; \
84       else              low  = t; \
85     } \
86     for (_i=low; _i<high; _i++) { \
87       if (rp[_i] > bcol) break; \
88       if (rp[_i] == bcol) { \
89         bap = ap + bs2*_i + bs*cidx + ridx; \
90         if (addv == ADD_VALUES) *bap += value;  \
91         else                    *bap  = value;  \
92         goto b_noinsert; \
93       } \
94     } \
95     if (b->nonew == 1) goto b_noinsert; \
96     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
97     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
98     N = nrow++ - 1;  \
99     /* shift up all the later entries in this row */ \
100     for (ii=N; ii>=_i; ii--) { \
101       rp[ii+1] = rp[ii]; \
102       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
103     } \
104     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
105     rp[_i]                      = bcol;  \
106     ap[bs2*_i + bs*cidx + ridx] = value;  \
107     B->nonzerostate++;\
108 b_noinsert:; \
109     bilen[brow] = nrow; \
110   }
111 
112 /* Only add/insert a(i,j) with i<=j (blocks).
113    Any a(i,j) with i>j input by user is ingored.
114 */
115 #undef __FUNCT__
116 #define __FUNCT__ "MatSetValues_MPISBAIJ"
117 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
118 {
119   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
120   MatScalar      value;
121   PetscBool      roworiented = baij->roworiented;
122   PetscErrorCode ierr;
123   PetscInt       i,j,row,col;
124   PetscInt       rstart_orig=mat->rmap->rstart;
125   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
126   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
127 
128   /* Some Variables required in the macro */
129   Mat          A     = baij->A;
130   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
131   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
132   MatScalar    *aa   =a->a;
133 
134   Mat         B     = baij->B;
135   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
136   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
137   MatScalar   *ba   =b->a;
138 
139   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
140   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
141   MatScalar *ap,*bap;
142 
143   /* for stash */
144   PetscInt  n_loc, *in_loc = NULL;
145   MatScalar *v_loc = NULL;
146 
147   PetscFunctionBegin;
148   if (!baij->donotstash) {
149     if (n > baij->n_loc) {
150       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
151       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
152       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
153       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
154 
155       baij->n_loc = n;
156     }
157     in_loc = baij->in_loc;
158     v_loc  = baij->v_loc;
159   }
160 
161   for (i=0; i<m; i++) {
162     if (im[i] < 0) continue;
163 #if defined(PETSC_USE_DEBUG)
164     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
165 #endif
166     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
167       row = im[i] - rstart_orig;              /* local row index */
168       for (j=0; j<n; j++) {
169         if (im[i]/bs > in[j]/bs) {
170           if (a->ignore_ltriangular) {
171             continue;    /* ignore lower triangular blocks */
172           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
173         }
174         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
175           col  = in[j] - cstart_orig;         /* local col index */
176           brow = row/bs; bcol = col/bs;
177           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
178           if (roworiented) value = v[i*n+j];
179           else             value = v[i+j*m];
180           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
181           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
182         } else if (in[j] < 0) continue;
183 #if defined(PETSC_USE_DEBUG)
184         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
185 #endif
186         else {  /* off-diag entry (B) */
187           if (mat->was_assembled) {
188             if (!baij->colmap) {
189               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
190             }
191 #if defined(PETSC_USE_CTABLE)
192             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
193             col  = col - 1;
194 #else
195             col = baij->colmap[in[j]/bs] - 1;
196 #endif
197             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
198               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
199               col  =  in[j];
200               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
201               B    = baij->B;
202               b    = (Mat_SeqBAIJ*)(B)->data;
203               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
204               ba   = b->a;
205             } else col += in[j]%bs;
206           } else col = in[j];
207           if (roworiented) value = v[i*n+j];
208           else             value = v[i+j*m];
209           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
210           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
211         }
212       }
213     } else {  /* off processor entry */
214       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
215       if (!baij->donotstash) {
216         mat->assembled = PETSC_FALSE;
217         n_loc          = 0;
218         for (j=0; j<n; j++) {
219           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
220           in_loc[n_loc] = in[j];
221           if (roworiented) {
222             v_loc[n_loc] = v[i*n+j];
223           } else {
224             v_loc[n_loc] = v[j*m+i];
225           }
226           n_loc++;
227         }
228         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
229       }
230     }
231   }
232   PetscFunctionReturn(0);
233 }
234 
235 #undef __FUNCT__
236 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
237 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
238 {
239   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
240   const MatScalar *value;
241   MatScalar       *barray     =baij->barray;
242   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
243   PetscErrorCode  ierr;
244   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
245   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
246   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
247 
248   PetscFunctionBegin;
249   if (!barray) {
250     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
251     baij->barray = barray;
252   }
253 
254   if (roworiented) {
255     stepval = (n-1)*bs;
256   } else {
257     stepval = (m-1)*bs;
258   }
259   for (i=0; i<m; i++) {
260     if (im[i] < 0) continue;
261 #if defined(PETSC_USE_DEBUG)
262     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
263 #endif
264     if (im[i] >= rstart && im[i] < rend) {
265       row = im[i] - rstart;
266       for (j=0; j<n; j++) {
267         if (im[i] > in[j]) {
268           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
269           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
270         }
271         /* If NumCol = 1 then a copy is not required */
272         if ((roworiented) && (n == 1)) {
273           barray = (MatScalar*) v + i*bs2;
274         } else if ((!roworiented) && (m == 1)) {
275           barray = (MatScalar*) v + j*bs2;
276         } else { /* Here a copy is required */
277           if (roworiented) {
278             value = v + i*(stepval+bs)*bs + j*bs;
279           } else {
280             value = v + j*(stepval+bs)*bs + i*bs;
281           }
282           for (ii=0; ii<bs; ii++,value+=stepval) {
283             for (jj=0; jj<bs; jj++) {
284               *barray++ = *value++;
285             }
286           }
287           barray -=bs2;
288         }
289 
290         if (in[j] >= cstart && in[j] < cend) {
291           col  = in[j] - cstart;
292           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
293         } else if (in[j] < 0) continue;
294 #if defined(PETSC_USE_DEBUG)
295         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
296 #endif
297         else {
298           if (mat->was_assembled) {
299             if (!baij->colmap) {
300               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
301             }
302 
303 #if defined(PETSC_USE_DEBUG)
304 #if defined(PETSC_USE_CTABLE)
305             { PetscInt data;
306               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
307               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
308             }
309 #else
310             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
311 #endif
312 #endif
313 #if defined(PETSC_USE_CTABLE)
314             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
315             col  = (col - 1)/bs;
316 #else
317             col = (baij->colmap[in[j]] - 1)/bs;
318 #endif
319             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
320               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
321               col  = in[j];
322             }
323           } else col = in[j];
324           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
325         }
326       }
327     } else {
328       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
329       if (!baij->donotstash) {
330         if (roworiented) {
331           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
332         } else {
333           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
334         }
335       }
336     }
337   }
338   PetscFunctionReturn(0);
339 }
340 
341 #undef __FUNCT__
342 #define __FUNCT__ "MatGetValues_MPISBAIJ"
343 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
344 {
345   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
346   PetscErrorCode ierr;
347   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
348   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
349 
350   PetscFunctionBegin;
351   for (i=0; i<m; i++) {
352     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
353     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
354     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
355       row = idxm[i] - bsrstart;
356       for (j=0; j<n; j++) {
357         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
358         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
359         if (idxn[j] >= bscstart && idxn[j] < bscend) {
360           col  = idxn[j] - bscstart;
361           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
362         } else {
363           if (!baij->colmap) {
364             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
365           }
366 #if defined(PETSC_USE_CTABLE)
367           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
368           data--;
369 #else
370           data = baij->colmap[idxn[j]/bs]-1;
371 #endif
372           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
373           else {
374             col  = data + idxn[j]%bs;
375             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
376           }
377         }
378       }
379     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
380   }
381   PetscFunctionReturn(0);
382 }
383 
384 #undef __FUNCT__
385 #define __FUNCT__ "MatNorm_MPISBAIJ"
386 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
387 {
388   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
389   PetscErrorCode ierr;
390   PetscReal      sum[2],*lnorm2;
391 
392   PetscFunctionBegin;
393   if (baij->size == 1) {
394     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
395   } else {
396     if (type == NORM_FROBENIUS) {
397       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
398       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
399       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
400       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
401       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
402       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
403       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
404       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
405     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
406       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
407       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
408       PetscReal    *rsum,*rsum2,vabs;
409       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
410       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
411       MatScalar    *v;
412 
413       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
414       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
415       /* Amat */
416       v = amat->a; jj = amat->j;
417       for (brow=0; brow<mbs; brow++) {
418         grow = bs*(rstart + brow);
419         nz   = amat->i[brow+1] - amat->i[brow];
420         for (bcol=0; bcol<nz; bcol++) {
421           gcol = bs*(rstart + *jj); jj++;
422           for (col=0; col<bs; col++) {
423             for (row=0; row<bs; row++) {
424               vabs            = PetscAbsScalar(*v); v++;
425               rsum[gcol+col] += vabs;
426               /* non-diagonal block */
427               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
428             }
429           }
430         }
431       }
432       /* Bmat */
433       v = bmat->a; jj = bmat->j;
434       for (brow=0; brow<mbs; brow++) {
435         grow = bs*(rstart + brow);
436         nz = bmat->i[brow+1] - bmat->i[brow];
437         for (bcol=0; bcol<nz; bcol++) {
438           gcol = bs*garray[*jj]; jj++;
439           for (col=0; col<bs; col++) {
440             for (row=0; row<bs; row++) {
441               vabs            = PetscAbsScalar(*v); v++;
442               rsum[gcol+col] += vabs;
443               rsum[grow+row] += vabs;
444             }
445           }
446         }
447       }
448       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
449       *norm = 0.0;
450       for (col=0; col<mat->cmap->N; col++) {
451         if (rsum2[col] > *norm) *norm = rsum2[col];
452       }
453       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
454     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
455   }
456   PetscFunctionReturn(0);
457 }
458 
459 #undef __FUNCT__
460 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
461 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
462 {
463   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
464   PetscErrorCode ierr;
465   PetscInt       nstash,reallocs;
466   InsertMode     addv;
467 
468   PetscFunctionBegin;
469   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
470 
471   /* make sure all processors are either in INSERTMODE or ADDMODE */
472   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
473   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
474   mat->insertmode = addv; /* in case this processor had no cache */
475 
476   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
477   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
478   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
479   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
480   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
481   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
482   PetscFunctionReturn(0);
483 }
484 
485 #undef __FUNCT__
486 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
487 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
488 {
489   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
490   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
491   PetscErrorCode ierr;
492   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
493   PetscInt       *row,*col;
494   PetscBool      other_disassembled;
495   PetscMPIInt    n;
496   PetscBool      r1,r2,r3;
497   MatScalar      *val;
498   InsertMode     addv = mat->insertmode;
499 
500   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
501   PetscFunctionBegin;
502   if (!baij->donotstash &&  !mat->nooffprocentries) {
503     while (1) {
504       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
505       if (!flg) break;
506 
507       for (i=0; i<n;) {
508         /* Now identify the consecutive vals belonging to the same row */
509         for (j=i,rstart=row[j]; j<n; j++) {
510           if (row[j] != rstart) break;
511         }
512         if (j < n) ncols = j-i;
513         else       ncols = n-i;
514         /* Now assemble all these values with a single function call */
515         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
516         i    = j;
517       }
518     }
519     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
520     /* Now process the block-stash. Since the values are stashed column-oriented,
521        set the roworiented flag to column oriented, and after MatSetValues()
522        restore the original flags */
523     r1 = baij->roworiented;
524     r2 = a->roworiented;
525     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
526 
527     baij->roworiented = PETSC_FALSE;
528     a->roworiented    = PETSC_FALSE;
529 
530     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) {
538           if (row[j] != rstart) break;
539         }
540         if (j < n) ncols = j-i;
541         else       ncols = n-i;
542         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
543         i    = j;
544       }
545     }
546     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
547 
548     baij->roworiented = r1;
549     a->roworiented    = r2;
550 
551     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
552   }
553 
554   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
555   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
556 
557   /* determine if any processor has disassembled, if so we must
558      also disassemble ourselfs, in order that we may reassemble. */
559   /*
560      if nonzero structure of submatrix B cannot change then we know that
561      no processor disassembled thus we can skip this stuff
562   */
563   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
564     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
565     if (mat->was_assembled && !other_disassembled) {
566       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
567     }
568   }
569 
570   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
571     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
572   }
573   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
575 
576   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
577 
578   baij->rowvalues = 0;
579 
580   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
581   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
582     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
583     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
584   }
585   PetscFunctionReturn(0);
586 }
587 
588 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
589 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
590 #include <petscdraw.h>
591 #undef __FUNCT__
592 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
593 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
594 {
595   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
596   PetscErrorCode    ierr;
597   PetscInt          bs   = mat->rmap->bs;
598   PetscMPIInt       rank = baij->rank;
599   PetscBool         iascii,isdraw;
600   PetscViewer       sviewer;
601   PetscViewerFormat format;
602 
603   PetscFunctionBegin;
604   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
605   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
606   if (iascii) {
607     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
608     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
609       MatInfo info;
610       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
611       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
612       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
613       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
614       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
615       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
616       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
617       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
618       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
619       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
620       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
621       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
622       PetscFunctionReturn(0);
623     } else if (format == PETSC_VIEWER_ASCII_INFO) {
624       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
625       PetscFunctionReturn(0);
626     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
627       PetscFunctionReturn(0);
628     }
629   }
630 
631   if (isdraw) {
632     PetscDraw draw;
633     PetscBool isnull;
634     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
635     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
636   }
637 
638   {
639     /* assemble the entire matrix onto first processor. */
640     Mat          A;
641     Mat_SeqSBAIJ *Aloc;
642     Mat_SeqBAIJ  *Bloc;
643     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
644     MatScalar    *a;
645     const char   *matname;
646 
647     /* Should this be the same type as mat? */
648     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
649     if (!rank) {
650       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
651     } else {
652       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
653     }
654     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
655     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
656     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
657     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
658 
659     /* copy over the A part */
660     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
661     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
662     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
663 
664     for (i=0; i<mbs; i++) {
665       rvals[0] = bs*(baij->rstartbs + i);
666       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
667       for (j=ai[i]; j<ai[i+1]; j++) {
668         col = (baij->cstartbs+aj[j])*bs;
669         for (k=0; k<bs; k++) {
670           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
671           col++;
672           a += bs;
673         }
674       }
675     }
676     /* copy over the B part */
677     Bloc = (Mat_SeqBAIJ*)baij->B->data;
678     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
679     for (i=0; i<mbs; i++) {
680 
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = baij->garray[aj[j]]*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++;
688           a += bs;
689         }
690       }
691     }
692     ierr = PetscFree(rvals);CHKERRQ(ierr);
693     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
694     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
695     /*
696        Everyone has to call to draw the matrix since the graphics waits are
697        synchronized across all processors that share the PetscDraw object
698     */
699     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
700     ierr = PetscObjectGetName((PetscObject)mat,&matname);CHKERRQ(ierr);
701     if (!rank) {
702       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,matname);CHKERRQ(ierr);
703       ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
704     }
705     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
706     ierr = MatDestroy(&A);CHKERRQ(ierr);
707   }
708   PetscFunctionReturn(0);
709 }
710 
711 #undef __FUNCT__
712 #define __FUNCT__ "MatView_MPISBAIJ"
713 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
714 {
715   PetscErrorCode ierr;
716   PetscBool      iascii,isdraw,issocket,isbinary;
717 
718   PetscFunctionBegin;
719   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
720   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
721   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
722   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
723   if (iascii || isdraw || issocket || isbinary) {
724     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
725   }
726   PetscFunctionReturn(0);
727 }
728 
729 #undef __FUNCT__
730 #define __FUNCT__ "MatDestroy_MPISBAIJ"
731 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
732 {
733   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
734   PetscErrorCode ierr;
735 
736   PetscFunctionBegin;
737 #if defined(PETSC_USE_LOG)
738   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
739 #endif
740   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
741   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
742   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
743   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
744 #if defined(PETSC_USE_CTABLE)
745   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
746 #else
747   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
748 #endif
749   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
750   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
751   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
752   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
753   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
754   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
755   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
756   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
757   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
758   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
759   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
760   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
761   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
762   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
763   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
764 #if defined(PETSC_USE_REAL_MAT_SINGLE)
765   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
766 #endif
767   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
768   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
769   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
770   ierr = PetscFree(mat->data);CHKERRQ(ierr);
771 
772   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
773   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
774   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
775   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
776   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
777   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
778   PetscFunctionReturn(0);
779 }
780 
781 #undef __FUNCT__
782 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
783 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
784 {
785   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
786   PetscErrorCode ierr;
787   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
788   PetscScalar    *x,*from;
789 
790   PetscFunctionBegin;
791   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
792   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
793 
794   /* diagonal part */
795   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
796   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
797 
798   /* subdiagonal part */
799   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
800 
801   /* copy x into the vec slvec0 */
802   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
803   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
804 
805   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
806   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
807   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
808 
809   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
810   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
811   /* supperdiagonal part */
812   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
813   PetscFunctionReturn(0);
814 }
815 
816 #undef __FUNCT__
817 #define __FUNCT__ "MatMult_MPISBAIJ"
818 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
819 {
820   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
821   PetscErrorCode ierr;
822   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
823   PetscScalar    *x,*from;
824 
825   PetscFunctionBegin;
826   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
827   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
828 
829   /* diagonal part */
830   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
831   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
832 
833   /* subdiagonal part */
834   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
835 
836   /* copy x into the vec slvec0 */
837   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
838   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
839 
840   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
841   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
842   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
843 
844   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
845   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
846   /* supperdiagonal part */
847   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
853 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
854 {
855   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
856   PetscErrorCode ierr;
857   PetscInt       nt;
858 
859   PetscFunctionBegin;
860   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
861   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
862 
863   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
864   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
865 
866   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
867   /* do diagonal part */
868   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
869   /* do supperdiagonal part */
870   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
871   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
872   /* do subdiagonal part */
873   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
874   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
875   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
876   PetscFunctionReturn(0);
877 }
878 
879 #undef __FUNCT__
880 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
881 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
882 {
883   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
884   PetscErrorCode ierr;
885   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
886   PetscScalar    *x,*from,zero=0.0;
887 
888   PetscFunctionBegin;
889   /*
890   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
891   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
892   */
893   /* diagonal part */
894   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
895   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
896 
897   /* subdiagonal part */
898   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
899 
900   /* copy x into the vec slvec0 */
901   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
902   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
903   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
904   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
905 
906   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
908   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
909 
910   /* supperdiagonal part */
911   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
917 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
918 {
919   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
920   PetscErrorCode ierr;
921 
922   PetscFunctionBegin;
923   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   /* do diagonal part */
925   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
926   /* do supperdiagonal part */
927   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
929 
930   /* do subdiagonal part */
931   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
932   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
933   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
934   PetscFunctionReturn(0);
935 }
936 
937 /*
938   This only works correctly for square matrices where the subblock A->A is the
939    diagonal block
940 */
941 #undef __FUNCT__
942 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
943 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
944 {
945   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
946   PetscErrorCode ierr;
947 
948   PetscFunctionBegin;
949   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
950   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
951   PetscFunctionReturn(0);
952 }
953 
954 #undef __FUNCT__
955 #define __FUNCT__ "MatScale_MPISBAIJ"
956 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
957 {
958   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
959   PetscErrorCode ierr;
960 
961   PetscFunctionBegin;
962   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
963   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
964   PetscFunctionReturn(0);
965 }
966 
967 #undef __FUNCT__
968 #define __FUNCT__ "MatGetRow_MPISBAIJ"
969 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
970 {
971   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
972   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
973   PetscErrorCode ierr;
974   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
975   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
976   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
977 
978   PetscFunctionBegin;
979   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
980   mat->getrowactive = PETSC_TRUE;
981 
982   if (!mat->rowvalues && (idx || v)) {
983     /*
984         allocate enough space to hold information from the longest row.
985     */
986     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
987     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
988     PetscInt     max = 1,mbs = mat->mbs,tmp;
989     for (i=0; i<mbs; i++) {
990       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
991       if (max < tmp) max = tmp;
992     }
993     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
994   }
995 
996   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
997   lrow = row - brstart;  /* local row index */
998 
999   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1000   if (!v)   {pvA = 0; pvB = 0;}
1001   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1002   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1003   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1004   nztot = nzA + nzB;
1005 
1006   cmap = mat->garray;
1007   if (v  || idx) {
1008     if (nztot) {
1009       /* Sort by increasing column numbers, assuming A and B already sorted */
1010       PetscInt imark = -1;
1011       if (v) {
1012         *v = v_p = mat->rowvalues;
1013         for (i=0; i<nzB; i++) {
1014           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1015           else break;
1016         }
1017         imark = i;
1018         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1019         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1020       }
1021       if (idx) {
1022         *idx = idx_p = mat->rowindices;
1023         if (imark > -1) {
1024           for (i=0; i<imark; i++) {
1025             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1026           }
1027         } else {
1028           for (i=0; i<nzB; i++) {
1029             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1030             else break;
1031           }
1032           imark = i;
1033         }
1034         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1035         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1036       }
1037     } else {
1038       if (idx) *idx = 0;
1039       if (v)   *v   = 0;
1040     }
1041   }
1042   *nz  = nztot;
1043   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1044   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1050 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1051 {
1052   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1053 
1054   PetscFunctionBegin;
1055   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1056   baij->getrowactive = PETSC_FALSE;
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1062 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1063 {
1064   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1065   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1066 
1067   PetscFunctionBegin;
1068   aA->getrow_utriangular = PETSC_TRUE;
1069   PetscFunctionReturn(0);
1070 }
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1073 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1074 {
1075   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1076   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1077 
1078   PetscFunctionBegin;
1079   aA->getrow_utriangular = PETSC_FALSE;
1080   PetscFunctionReturn(0);
1081 }
1082 
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1085 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1086 {
1087   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1092   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1098 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1099 {
1100   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1105   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1111 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1112 {
1113   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1114   PetscErrorCode ierr;
1115 
1116   PetscFunctionBegin;
1117   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1118   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1124 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1127   Mat            A  = a->A,B = a->B;
1128   PetscErrorCode ierr;
1129   PetscReal      isend[5],irecv[5];
1130 
1131   PetscFunctionBegin;
1132   info->block_size = (PetscReal)matin->rmap->bs;
1133 
1134   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1135 
1136   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1137   isend[3] = info->memory;  isend[4] = info->mallocs;
1138 
1139   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1140 
1141   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1142   isend[3] += info->memory;  isend[4] += info->mallocs;
1143   if (flag == MAT_LOCAL) {
1144     info->nz_used      = isend[0];
1145     info->nz_allocated = isend[1];
1146     info->nz_unneeded  = isend[2];
1147     info->memory       = isend[3];
1148     info->mallocs      = isend[4];
1149   } else if (flag == MAT_GLOBAL_MAX) {
1150     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1151 
1152     info->nz_used      = irecv[0];
1153     info->nz_allocated = irecv[1];
1154     info->nz_unneeded  = irecv[2];
1155     info->memory       = irecv[3];
1156     info->mallocs      = irecv[4];
1157   } else if (flag == MAT_GLOBAL_SUM) {
1158     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1159 
1160     info->nz_used      = irecv[0];
1161     info->nz_allocated = irecv[1];
1162     info->nz_unneeded  = irecv[2];
1163     info->memory       = irecv[3];
1164     info->mallocs      = irecv[4];
1165   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1166   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1167   info->fill_ratio_needed = 0;
1168   info->factor_mallocs    = 0;
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1174 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1175 {
1176   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1177   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181   switch (op) {
1182   case MAT_NEW_NONZERO_LOCATIONS:
1183   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1184   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1185   case MAT_KEEP_NONZERO_PATTERN:
1186   case MAT_NEW_NONZERO_LOCATION_ERR:
1187     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1188     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1189     break;
1190   case MAT_ROW_ORIENTED:
1191     a->roworiented = flg;
1192 
1193     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1194     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1195     break;
1196   case MAT_NEW_DIAGONALS:
1197     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1198     break;
1199   case MAT_IGNORE_OFF_PROC_ENTRIES:
1200     a->donotstash = flg;
1201     break;
1202   case MAT_USE_HASH_TABLE:
1203     a->ht_flag = flg;
1204     break;
1205   case MAT_HERMITIAN:
1206     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208 
1209     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1210     break;
1211   case MAT_SPD:
1212     A->spd_set = PETSC_TRUE;
1213     A->spd     = flg;
1214     if (flg) {
1215       A->symmetric                  = PETSC_TRUE;
1216       A->structurally_symmetric     = PETSC_TRUE;
1217       A->symmetric_set              = PETSC_TRUE;
1218       A->structurally_symmetric_set = PETSC_TRUE;
1219     }
1220     break;
1221   case MAT_SYMMETRIC:
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     break;
1224   case MAT_STRUCTURALLY_SYMMETRIC:
1225     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1226     break;
1227   case MAT_SYMMETRY_ETERNAL:
1228     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1229     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1230     break;
1231   case MAT_IGNORE_LOWER_TRIANGULAR:
1232     aA->ignore_ltriangular = flg;
1233     break;
1234   case MAT_ERROR_LOWER_TRIANGULAR:
1235     aA->ignore_ltriangular = flg;
1236     break;
1237   case MAT_GETROW_UPPERTRIANGULAR:
1238     aA->getrow_utriangular = flg;
1239     break;
1240   default:
1241     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1242   }
1243   PetscFunctionReturn(0);
1244 }
1245 
1246 #undef __FUNCT__
1247 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1248 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1249 {
1250   PetscErrorCode ierr;
1251 
1252   PetscFunctionBegin;
1253   if (MAT_INITIAL_MATRIX || *B != A) {
1254     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1255   }
1256   PetscFunctionReturn(0);
1257 }
1258 
1259 #undef __FUNCT__
1260 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1261 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1262 {
1263   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1264   Mat            a     = baij->A, b=baij->B;
1265   PetscErrorCode ierr;
1266   PetscInt       nv,m,n;
1267   PetscBool      flg;
1268 
1269   PetscFunctionBegin;
1270   if (ll != rr) {
1271     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1272     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1273   }
1274   if (!ll) PetscFunctionReturn(0);
1275 
1276   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1277   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1278 
1279   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1280   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1281 
1282   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1283 
1284   /* left diagonalscale the off-diagonal part */
1285   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1286 
1287   /* scale the diagonal part */
1288   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1289 
1290   /* right diagonalscale the off-diagonal part */
1291   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1292   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1298 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1299 {
1300   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1301   PetscErrorCode ierr;
1302 
1303   PetscFunctionBegin;
1304   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatEqual_MPISBAIJ"
1312 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1313 {
1314   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1315   Mat            a,b,c,d;
1316   PetscBool      flg;
1317   PetscErrorCode ierr;
1318 
1319   PetscFunctionBegin;
1320   a = matA->A; b = matA->B;
1321   c = matB->A; d = matB->B;
1322 
1323   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1324   if (flg) {
1325     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1326   }
1327   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatCopy_MPISBAIJ"
1333 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1334 {
1335   PetscErrorCode ierr;
1336   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1337   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1338 
1339   PetscFunctionBegin;
1340   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1341   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1342     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1343     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1344     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1345   } else {
1346     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1347     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1348   }
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1354 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1355 {
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1365 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1366 {
1367   PetscErrorCode ierr;
1368   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1369   PetscBLASInt   bnz,one=1;
1370   Mat_SeqSBAIJ   *xa,*ya;
1371   Mat_SeqBAIJ    *xb,*yb;
1372 
1373   PetscFunctionBegin;
1374   if (str == SAME_NONZERO_PATTERN) {
1375     PetscScalar alpha = a;
1376     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1377     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1378     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1379     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1380     xb   = (Mat_SeqBAIJ*)xx->B->data;
1381     yb   = (Mat_SeqBAIJ*)yy->B->data;
1382     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1383     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1384     ierr = PetscObjectStateIncrease((PetscObject)Y);CHKERRQ(ierr);
1385   } else {
1386     Mat      B;
1387     PetscInt *nnz_d,*nnz_o,bs=Y->rmap->bs;
1388     if (bs != X->rmap->bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Matrices must have same block size");
1389     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1390     ierr = MatGetRowUpperTriangular(Y);CHKERRQ(ierr);
1391     ierr = PetscMalloc1(yy->A->rmap->N,&nnz_d);CHKERRQ(ierr);
1392     ierr = PetscMalloc1(yy->B->rmap->N,&nnz_o);CHKERRQ(ierr);
1393     ierr = MatCreate(PetscObjectComm((PetscObject)Y),&B);CHKERRQ(ierr);
1394     ierr = PetscObjectSetName((PetscObject)B,((PetscObject)Y)->name);CHKERRQ(ierr);
1395     ierr = MatSetSizes(B,Y->rmap->n,Y->cmap->n,Y->rmap->N,Y->cmap->N);CHKERRQ(ierr);
1396     ierr = MatSetBlockSizesFromMats(B,Y,Y);CHKERRQ(ierr);
1397     ierr = MatSetType(B,MATMPISBAIJ);CHKERRQ(ierr);
1398     ierr = MatAXPYGetPreallocation_SeqSBAIJ(yy->A,xx->A,nnz_d);CHKERRQ(ierr);
1399     ierr = MatAXPYGetPreallocation_MPIBAIJ(yy->B,yy->garray,xx->B,xx->garray,nnz_o);CHKERRQ(ierr);
1400     ierr = MatMPISBAIJSetPreallocation(B,bs,0,nnz_d,0,nnz_o);CHKERRQ(ierr);
1401     ierr = MatAXPY_BasicWithPreallocation(B,Y,a,X,str);CHKERRQ(ierr);
1402     ierr = MatHeaderReplace(Y,B);CHKERRQ(ierr);
1403     ierr = PetscFree(nnz_d);CHKERRQ(ierr);
1404     ierr = PetscFree(nnz_o);CHKERRQ(ierr);
1405     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1406     ierr = MatRestoreRowUpperTriangular(Y);CHKERRQ(ierr);
1407   }
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1413 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1414 {
1415   PetscErrorCode ierr;
1416   PetscInt       i;
1417   PetscBool      flg;
1418 
1419   PetscFunctionBegin;
1420   for (i=0; i<n; i++) {
1421     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1422     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1423   }
1424   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 
1429 /* -------------------------------------------------------------------*/
1430 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1431                                        MatGetRow_MPISBAIJ,
1432                                        MatRestoreRow_MPISBAIJ,
1433                                        MatMult_MPISBAIJ,
1434                                /*  4*/ MatMultAdd_MPISBAIJ,
1435                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1436                                        MatMultAdd_MPISBAIJ,
1437                                        0,
1438                                        0,
1439                                        0,
1440                                /* 10*/ 0,
1441                                        0,
1442                                        0,
1443                                        MatSOR_MPISBAIJ,
1444                                        MatTranspose_MPISBAIJ,
1445                                /* 15*/ MatGetInfo_MPISBAIJ,
1446                                        MatEqual_MPISBAIJ,
1447                                        MatGetDiagonal_MPISBAIJ,
1448                                        MatDiagonalScale_MPISBAIJ,
1449                                        MatNorm_MPISBAIJ,
1450                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1451                                        MatAssemblyEnd_MPISBAIJ,
1452                                        MatSetOption_MPISBAIJ,
1453                                        MatZeroEntries_MPISBAIJ,
1454                                /* 24*/ 0,
1455                                        0,
1456                                        0,
1457                                        0,
1458                                        0,
1459                                /* 29*/ MatSetUp_MPISBAIJ,
1460                                        0,
1461                                        0,
1462                                        0,
1463                                        0,
1464                                /* 34*/ MatDuplicate_MPISBAIJ,
1465                                        0,
1466                                        0,
1467                                        0,
1468                                        0,
1469                                /* 39*/ MatAXPY_MPISBAIJ,
1470                                        MatGetSubMatrices_MPISBAIJ,
1471                                        MatIncreaseOverlap_MPISBAIJ,
1472                                        MatGetValues_MPISBAIJ,
1473                                        MatCopy_MPISBAIJ,
1474                                /* 44*/ 0,
1475                                        MatScale_MPISBAIJ,
1476                                        0,
1477                                        0,
1478                                        0,
1479                                /* 49*/ 0,
1480                                        0,
1481                                        0,
1482                                        0,
1483                                        0,
1484                                /* 54*/ 0,
1485                                        0,
1486                                        MatSetUnfactored_MPISBAIJ,
1487                                        0,
1488                                        MatSetValuesBlocked_MPISBAIJ,
1489                                /* 59*/ 0,
1490                                        0,
1491                                        0,
1492                                        0,
1493                                        0,
1494                                /* 64*/ 0,
1495                                        0,
1496                                        0,
1497                                        0,
1498                                        0,
1499                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1500                                        0,
1501                                        0,
1502                                        0,
1503                                        0,
1504                                /* 74*/ 0,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                        0,
1509                                /* 79*/ 0,
1510                                        0,
1511                                        0,
1512                                        0,
1513                                        MatLoad_MPISBAIJ,
1514                                /* 84*/ 0,
1515                                        0,
1516                                        0,
1517                                        0,
1518                                        0,
1519                                /* 89*/ 0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                        0,
1524                                /* 94*/ 0,
1525                                        0,
1526                                        0,
1527                                        0,
1528                                        0,
1529                                /* 99*/ 0,
1530                                        0,
1531                                        0,
1532                                        0,
1533                                        0,
1534                                /*104*/ 0,
1535                                        MatRealPart_MPISBAIJ,
1536                                        MatImaginaryPart_MPISBAIJ,
1537                                        MatGetRowUpperTriangular_MPISBAIJ,
1538                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1539                                /*109*/ 0,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                        0,
1544                                /*114*/ 0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                        0,
1549                                /*119*/ 0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                        0,
1554                                /*124*/ 0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                        0,
1559                                /*129*/ 0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                        0,
1564                                /*134*/ 0,
1565                                        0,
1566                                        0,
1567                                        0,
1568                                        0,
1569                                /*139*/ 0,
1570                                        0,
1571                                        0
1572 };
1573 
1574 
1575 #undef __FUNCT__
1576 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1577 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1578 {
1579   PetscFunctionBegin;
1580   *a = ((Mat_MPISBAIJ*)A->data)->A;
1581   PetscFunctionReturn(0);
1582 }
1583 
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1586 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1587 {
1588   Mat_MPISBAIJ   *b;
1589   PetscErrorCode ierr;
1590   PetscInt       i,mbs,Mbs;
1591 
1592   PetscFunctionBegin;
1593   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1594   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1595   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1596   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1597 
1598   b   = (Mat_MPISBAIJ*)B->data;
1599   mbs = B->rmap->n/bs;
1600   Mbs = B->rmap->N/bs;
1601   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1602 
1603   B->rmap->bs = bs;
1604   b->bs2      = bs*bs;
1605   b->mbs      = mbs;
1606   b->nbs      = mbs;
1607   b->Mbs      = Mbs;
1608   b->Nbs      = Mbs;
1609 
1610   for (i=0; i<=b->size; i++) {
1611     b->rangebs[i] = B->rmap->range[i]/bs;
1612   }
1613   b->rstartbs = B->rmap->rstart/bs;
1614   b->rendbs   = B->rmap->rend/bs;
1615 
1616   b->cstartbs = B->cmap->rstart/bs;
1617   b->cendbs   = B->cmap->rend/bs;
1618 
1619   if (!B->preallocated) {
1620     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1621     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1622     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1623     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1624     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1625     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1626     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1627     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1628     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1629   }
1630 
1631   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1632   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1633 
1634   B->preallocated = PETSC_TRUE;
1635   PetscFunctionReturn(0);
1636 }
1637 
1638 #undef __FUNCT__
1639 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1640 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1641 {
1642   PetscInt       m,rstart,cstart,cend;
1643   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1644   const PetscInt *JJ    =0;
1645   PetscScalar    *values=0;
1646   PetscErrorCode ierr;
1647 
1648   PetscFunctionBegin;
1649   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1650   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1651   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1652   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1653   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1654   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1655   m      = B->rmap->n/bs;
1656   rstart = B->rmap->rstart/bs;
1657   cstart = B->cmap->rstart/bs;
1658   cend   = B->cmap->rend/bs;
1659 
1660   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1661   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1662   for (i=0; i<m; i++) {
1663     nz = ii[i+1] - ii[i];
1664     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1665     nz_max = PetscMax(nz_max,nz);
1666     JJ     = jj + ii[i];
1667     for (j=0; j<nz; j++) {
1668       if (*JJ >= cstart) break;
1669       JJ++;
1670     }
1671     d = 0;
1672     for (; j<nz; j++) {
1673       if (*JJ++ >= cend) break;
1674       d++;
1675     }
1676     d_nnz[i] = d;
1677     o_nnz[i] = nz - d;
1678   }
1679   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1680   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1681 
1682   values = (PetscScalar*)V;
1683   if (!values) {
1684     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1685     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1686   }
1687   for (i=0; i<m; i++) {
1688     PetscInt          row    = i + rstart;
1689     PetscInt          ncols  = ii[i+1] - ii[i];
1690     const PetscInt    *icols = jj + ii[i];
1691     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1692     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1693   }
1694 
1695   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1696   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1697   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1698   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1699   PetscFunctionReturn(0);
1700 }
1701 
1702 #if defined(PETSC_HAVE_MUMPS)
1703 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1704 #endif
1705 #if defined(PETSC_HAVE_PASTIX)
1706 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1707 #endif
1708 
1709 /*MC
1710    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1711    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1712    the matrix is stored.
1713 
1714   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1715   can call MatSetOption(Mat, MAT_HERMITIAN);
1716 
1717    Options Database Keys:
1718 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1719 
1720   Level: beginner
1721 
1722 .seealso: MatCreateMPISBAIJ
1723 M*/
1724 
1725 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1726 
1727 #undef __FUNCT__
1728 #define __FUNCT__ "MatCreate_MPISBAIJ"
1729 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1730 {
1731   Mat_MPISBAIJ   *b;
1732   PetscErrorCode ierr;
1733   PetscBool      flg;
1734 
1735   PetscFunctionBegin;
1736   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1737   B->data = (void*)b;
1738   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1739 
1740   B->ops->destroy = MatDestroy_MPISBAIJ;
1741   B->ops->view    = MatView_MPISBAIJ;
1742   B->assembled    = PETSC_FALSE;
1743   B->insertmode   = NOT_SET_VALUES;
1744 
1745   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1746   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1747 
1748   /* build local table of row and column ownerships */
1749   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1750 
1751   /* build cache for off array entries formed */
1752   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1753 
1754   b->donotstash  = PETSC_FALSE;
1755   b->colmap      = NULL;
1756   b->garray      = NULL;
1757   b->roworiented = PETSC_TRUE;
1758 
1759   /* stuff used in block assembly */
1760   b->barray = 0;
1761 
1762   /* stuff used for matrix vector multiply */
1763   b->lvec    = 0;
1764   b->Mvctx   = 0;
1765   b->slvec0  = 0;
1766   b->slvec0b = 0;
1767   b->slvec1  = 0;
1768   b->slvec1a = 0;
1769   b->slvec1b = 0;
1770   b->sMvctx  = 0;
1771 
1772   /* stuff for MatGetRow() */
1773   b->rowindices   = 0;
1774   b->rowvalues    = 0;
1775   b->getrowactive = PETSC_FALSE;
1776 
1777   /* hash table stuff */
1778   b->ht           = 0;
1779   b->hd           = 0;
1780   b->ht_size      = 0;
1781   b->ht_flag      = PETSC_FALSE;
1782   b->ht_fact      = 0;
1783   b->ht_total_ct  = 0;
1784   b->ht_insert_ct = 0;
1785 
1786   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1787   b->ijonly = PETSC_FALSE;
1788 
1789   b->in_loc = 0;
1790   b->v_loc  = 0;
1791   b->n_loc  = 0;
1792   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1793   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1794   if (flg) {
1795     PetscReal fact = 1.39;
1796     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1797     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1798     if (fact <= 1.0) fact = 1.39;
1799     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1800     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1801   }
1802   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1803 
1804 #if defined(PETSC_HAVE_PASTIX)
1805   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1806 #endif
1807 #if defined(PETSC_HAVE_MUMPS)
1808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1809 #endif
1810   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1811   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1812   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1813   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1815   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1816 
1817   B->symmetric                  = PETSC_TRUE;
1818   B->structurally_symmetric     = PETSC_TRUE;
1819   B->symmetric_set              = PETSC_TRUE;
1820   B->structurally_symmetric_set = PETSC_TRUE;
1821 
1822   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1823   PetscFunctionReturn(0);
1824 }
1825 
1826 /*MC
1827    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1828 
1829    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1830    and MATMPISBAIJ otherwise.
1831 
1832    Options Database Keys:
1833 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1834 
1835   Level: beginner
1836 
1837 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1838 M*/
1839 
1840 #undef __FUNCT__
1841 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1842 /*@C
1843    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1844    the user should preallocate the matrix storage by setting the parameters
1845    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1846    performance can be increased by more than a factor of 50.
1847 
1848    Collective on Mat
1849 
1850    Input Parameters:
1851 +  B - the matrix
1852 .  bs   - size of blockk
1853 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1854            submatrix  (same for all local rows)
1855 .  d_nnz - array containing the number of block nonzeros in the various block rows
1856            in the upper triangular and diagonal part of the in diagonal portion of the local
1857            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1858            for the diagonal entry and set a value even if it is zero.
1859 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1860            submatrix (same for all local rows).
1861 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1862            off-diagonal portion of the local submatrix that is right of the diagonal
1863            (possibly different for each block row) or NULL.
1864 
1865 
1866    Options Database Keys:
1867 .   -mat_no_unroll - uses code that does not unroll the loops in the
1868                      block calculations (much slower)
1869 .   -mat_block_size - size of the blocks to use
1870 
1871    Notes:
1872 
1873    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1874    than it must be used on all processors that share the object for that argument.
1875 
1876    If the *_nnz parameter is given then the *_nz parameter is ignored
1877 
1878    Storage Information:
1879    For a square global matrix we define each processor's diagonal portion
1880    to be its local rows and the corresponding columns (a square submatrix);
1881    each processor's off-diagonal portion encompasses the remainder of the
1882    local matrix (a rectangular submatrix).
1883 
1884    The user can specify preallocated storage for the diagonal part of
1885    the local submatrix with either d_nz or d_nnz (not both).  Set
1886    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1887    memory allocation.  Likewise, specify preallocated storage for the
1888    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1889 
1890    You can call MatGetInfo() to get information on how effective the preallocation was;
1891    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1892    You can also run with the option -info and look for messages with the string
1893    malloc in them to see if additional memory allocation was needed.
1894 
1895    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1896    the figure below we depict these three local rows and all columns (0-11).
1897 
1898 .vb
1899            0 1 2 3 4 5 6 7 8 9 10 11
1900           --------------------------
1901    row 3  |. . . d d d o o o o  o  o
1902    row 4  |. . . d d d o o o o  o  o
1903    row 5  |. . . d d d o o o o  o  o
1904           --------------------------
1905 .ve
1906 
1907    Thus, any entries in the d locations are stored in the d (diagonal)
1908    submatrix, and any entries in the o locations are stored in the
1909    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1910    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1911 
1912    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1913    plus the diagonal part of the d matrix,
1914    and o_nz should indicate the number of block nonzeros per row in the o matrix
1915 
1916    In general, for PDE problems in which most nonzeros are near the diagonal,
1917    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1918    or you will get TERRIBLE performance; see the users' manual chapter on
1919    matrices.
1920 
1921    Level: intermediate
1922 
1923 .keywords: matrix, block, aij, compressed row, sparse, parallel
1924 
1925 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1926 @*/
1927 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1928 {
1929   PetscErrorCode ierr;
1930 
1931   PetscFunctionBegin;
1932   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1933   PetscValidType(B,1);
1934   PetscValidLogicalCollectiveInt(B,bs,2);
1935   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 #undef __FUNCT__
1940 #define __FUNCT__ "MatCreateSBAIJ"
1941 /*@C
1942    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1943    (block compressed row).  For good matrix assembly performance
1944    the user should preallocate the matrix storage by setting the parameters
1945    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1946    performance can be increased by more than a factor of 50.
1947 
1948    Collective on MPI_Comm
1949 
1950    Input Parameters:
1951 +  comm - MPI communicator
1952 .  bs   - size of blockk
1953 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1954            This value should be the same as the local size used in creating the
1955            y vector for the matrix-vector product y = Ax.
1956 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1957            This value should be the same as the local size used in creating the
1958            x vector for the matrix-vector product y = Ax.
1959 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1960 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1961 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1962            submatrix  (same for all local rows)
1963 .  d_nnz - array containing the number of block nonzeros in the various block rows
1964            in the upper triangular portion of the in diagonal portion of the local
1965            (possibly different for each block block row) or NULL.
1966            If you plan to factor the matrix you must leave room for the diagonal entry and
1967            set its value even if it is zero.
1968 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1969            submatrix (same for all local rows).
1970 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1971            off-diagonal portion of the local submatrix (possibly different for
1972            each block row) or NULL.
1973 
1974    Output Parameter:
1975 .  A - the matrix
1976 
1977    Options Database Keys:
1978 .   -mat_no_unroll - uses code that does not unroll the loops in the
1979                      block calculations (much slower)
1980 .   -mat_block_size - size of the blocks to use
1981 .   -mat_mpi - use the parallel matrix data structures even on one processor
1982                (defaults to using SeqBAIJ format on one processor)
1983 
1984    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1985    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1986    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1987 
1988    Notes:
1989    The number of rows and columns must be divisible by blocksize.
1990    This matrix type does not support complex Hermitian operation.
1991 
1992    The user MUST specify either the local or global matrix dimensions
1993    (possibly both).
1994 
1995    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1996    than it must be used on all processors that share the object for that argument.
1997 
1998    If the *_nnz parameter is given then the *_nz parameter is ignored
1999 
2000    Storage Information:
2001    For a square global matrix we define each processor's diagonal portion
2002    to be its local rows and the corresponding columns (a square submatrix);
2003    each processor's off-diagonal portion encompasses the remainder of the
2004    local matrix (a rectangular submatrix).
2005 
2006    The user can specify preallocated storage for the diagonal part of
2007    the local submatrix with either d_nz or d_nnz (not both).  Set
2008    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2009    memory allocation.  Likewise, specify preallocated storage for the
2010    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2011 
2012    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2013    the figure below we depict these three local rows and all columns (0-11).
2014 
2015 .vb
2016            0 1 2 3 4 5 6 7 8 9 10 11
2017           --------------------------
2018    row 3  |. . . d d d o o o o  o  o
2019    row 4  |. . . d d d o o o o  o  o
2020    row 5  |. . . d d d o o o o  o  o
2021           --------------------------
2022 .ve
2023 
2024    Thus, any entries in the d locations are stored in the d (diagonal)
2025    submatrix, and any entries in the o locations are stored in the
2026    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2027    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2028 
2029    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2030    plus the diagonal part of the d matrix,
2031    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2032    In general, for PDE problems in which most nonzeros are near the diagonal,
2033    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2034    or you will get TERRIBLE performance; see the users' manual chapter on
2035    matrices.
2036 
2037    Level: intermediate
2038 
2039 .keywords: matrix, block, aij, compressed row, sparse, parallel
2040 
2041 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2042 @*/
2043 
2044 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2045 {
2046   PetscErrorCode ierr;
2047   PetscMPIInt    size;
2048 
2049   PetscFunctionBegin;
2050   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2051   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2052   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2053   if (size > 1) {
2054     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2055     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2056   } else {
2057     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2058     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2059   }
2060   PetscFunctionReturn(0);
2061 }
2062 
2063 
2064 #undef __FUNCT__
2065 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2066 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2067 {
2068   Mat            mat;
2069   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2070   PetscErrorCode ierr;
2071   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2072   PetscScalar    *array;
2073 
2074   PetscFunctionBegin;
2075   *newmat = 0;
2076 
2077   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2078   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2079   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2080   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2081   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2082   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2083 
2084   mat->factortype   = matin->factortype;
2085   mat->preallocated = PETSC_TRUE;
2086   mat->assembled    = PETSC_TRUE;
2087   mat->insertmode   = NOT_SET_VALUES;
2088 
2089   a      = (Mat_MPISBAIJ*)mat->data;
2090   a->bs2 = oldmat->bs2;
2091   a->mbs = oldmat->mbs;
2092   a->nbs = oldmat->nbs;
2093   a->Mbs = oldmat->Mbs;
2094   a->Nbs = oldmat->Nbs;
2095 
2096 
2097   a->size         = oldmat->size;
2098   a->rank         = oldmat->rank;
2099   a->donotstash   = oldmat->donotstash;
2100   a->roworiented  = oldmat->roworiented;
2101   a->rowindices   = 0;
2102   a->rowvalues    = 0;
2103   a->getrowactive = PETSC_FALSE;
2104   a->barray       = 0;
2105   a->rstartbs     = oldmat->rstartbs;
2106   a->rendbs       = oldmat->rendbs;
2107   a->cstartbs     = oldmat->cstartbs;
2108   a->cendbs       = oldmat->cendbs;
2109 
2110   /* hash table stuff */
2111   a->ht           = 0;
2112   a->hd           = 0;
2113   a->ht_size      = 0;
2114   a->ht_flag      = oldmat->ht_flag;
2115   a->ht_fact      = oldmat->ht_fact;
2116   a->ht_total_ct  = 0;
2117   a->ht_insert_ct = 0;
2118 
2119   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2120   if (oldmat->colmap) {
2121 #if defined(PETSC_USE_CTABLE)
2122     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2123 #else
2124     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2125     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2126     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2127 #endif
2128   } else a->colmap = 0;
2129 
2130   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2131     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2132     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2133     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2134   } else a->garray = 0;
2135 
2136   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2137   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2139   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2140   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2141 
2142   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2143   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2144   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2145   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2146 
2147   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2148   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2149   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2150   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2151   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2152   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2153   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2154   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2155   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2156   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2157   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2158   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2159   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2160 
2161   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2162   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2163   a->sMvctx = oldmat->sMvctx;
2164   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2165 
2166   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2167   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2168   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2169   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2170   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2171   *newmat = mat;
2172   PetscFunctionReturn(0);
2173 }
2174 
2175 #undef __FUNCT__
2176 #define __FUNCT__ "MatLoad_MPISBAIJ"
2177 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2178 {
2179   PetscErrorCode ierr;
2180   PetscInt       i,nz,j,rstart,rend;
2181   PetscScalar    *vals,*buf;
2182   MPI_Comm       comm;
2183   MPI_Status     status;
2184   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2185   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2186   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2187   PetscInt       bs = newmat->rmap->bs,Mbs,mbs,extra_rows;
2188   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2189   PetscInt       dcount,kmax,k,nzcount,tmp;
2190   int            fd;
2191 
2192   PetscFunctionBegin;
2193   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2194   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2195   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2196   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2197   if (bs < 0) bs = 1;
2198 
2199   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2200   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2201   if (!rank) {
2202     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2203     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2204     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2205     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2206   }
2207 
2208   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2209   M    = header[1];
2210   N    = header[2];
2211 
2212   /* If global sizes are set, check if they are consistent with that given in the file */
2213   if (newmat->rmap->N >= 0 && newmat->rmap->N != M) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%D) and input matrix has (%D)",newmat->rmap->N,M);
2214   if (newmat->cmap->N >= 0 && newmat->cmap->N != N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%D) and input matrix has (%D)",newmat->cmap->N,N);
2215 
2216   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2217 
2218   /*
2219      This code adds extra rows to make sure the number of rows is
2220      divisible by the blocksize
2221   */
2222   Mbs        = M/bs;
2223   extra_rows = bs - M + bs*(Mbs);
2224   if (extra_rows == bs) extra_rows = 0;
2225   else                  Mbs++;
2226   if (extra_rows &&!rank) {
2227     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2228   }
2229 
2230   /* determine ownership of all rows */
2231   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2232     mbs = Mbs/size + ((Mbs % size) > rank);
2233     m   = mbs*bs;
2234   } else { /* User Set */
2235     m   = newmat->rmap->n;
2236     mbs = m/bs;
2237   }
2238   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2239   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2240   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2241   rowners[0] = 0;
2242   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2243   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2244   rstart = rowners[rank];
2245   rend   = rowners[rank+1];
2246 
2247   /* distribute row lengths to all processors */
2248   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2249   if (!rank) {
2250     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2251     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2252     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2253     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2254     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2255     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2256     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2257   } else {
2258     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2259   }
2260 
2261   if (!rank) {   /* procs[0] */
2262     /* calculate the number of nonzeros on each processor */
2263     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2264     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2265     for (i=0; i<size; i++) {
2266       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2267         procsnz[i] += rowlengths[j];
2268       }
2269     }
2270     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2271 
2272     /* determine max buffer needed and allocate it */
2273     maxnz = 0;
2274     for (i=0; i<size; i++) {
2275       maxnz = PetscMax(maxnz,procsnz[i]);
2276     }
2277     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2278 
2279     /* read in my part of the matrix column indices  */
2280     nz     = procsnz[0];
2281     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2282     mycols = ibuf;
2283     if (size == 1) nz -= extra_rows;
2284     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2285     if (size == 1) {
2286       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2287     }
2288 
2289     /* read in every ones (except the last) and ship off */
2290     for (i=1; i<size-1; i++) {
2291       nz   = procsnz[i];
2292       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2293       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2294     }
2295     /* read in the stuff for the last proc */
2296     if (size != 1) {
2297       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2298       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2299       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2300       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2301     }
2302     ierr = PetscFree(cols);CHKERRQ(ierr);
2303   } else {  /* procs[i], i>0 */
2304     /* determine buffer space needed for message */
2305     nz = 0;
2306     for (i=0; i<m; i++) nz += locrowlens[i];
2307     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2308     mycols = ibuf;
2309     /* receive message of column indices*/
2310     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2311     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2312     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2313   }
2314 
2315   /* loop over local rows, determining number of off diagonal entries */
2316   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2317   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2318   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2319   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2320   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2321   rowcount = 0;
2322   nzcount  = 0;
2323   for (i=0; i<mbs; i++) {
2324     dcount  = 0;
2325     odcount = 0;
2326     for (j=0; j<bs; j++) {
2327       kmax = locrowlens[rowcount];
2328       for (k=0; k<kmax; k++) {
2329         tmp = mycols[nzcount++]/bs; /* block col. index */
2330         if (!mask[tmp]) {
2331           mask[tmp] = 1;
2332           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2333           else masked1[dcount++] = tmp; /* entry in diag portion */
2334         }
2335       }
2336       rowcount++;
2337     }
2338 
2339     dlens[i]  = dcount;  /* d_nzz[i] */
2340     odlens[i] = odcount; /* o_nzz[i] */
2341 
2342     /* zero out the mask elements we set */
2343     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2344     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2345   }
2346   ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2347   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2348   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2349 
2350   if (!rank) {
2351     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2352     /* read in my part of the matrix numerical values  */
2353     nz     = procsnz[0];
2354     vals   = buf;
2355     mycols = ibuf;
2356     if (size == 1) nz -= extra_rows;
2357     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2358     if (size == 1) {
2359       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2360     }
2361 
2362     /* insert into matrix */
2363     jj = rstart*bs;
2364     for (i=0; i<m; i++) {
2365       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2366       mycols += locrowlens[i];
2367       vals   += locrowlens[i];
2368       jj++;
2369     }
2370 
2371     /* read in other processors (except the last one) and ship out */
2372     for (i=1; i<size-1; i++) {
2373       nz   = procsnz[i];
2374       vals = buf;
2375       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2376       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2377     }
2378     /* the last proc */
2379     if (size != 1) {
2380       nz   = procsnz[i] - extra_rows;
2381       vals = buf;
2382       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2383       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2384       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2385     }
2386     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2387 
2388   } else {
2389     /* receive numeric values */
2390     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2391 
2392     /* receive message of values*/
2393     vals   = buf;
2394     mycols = ibuf;
2395     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2396     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2397     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2398 
2399     /* insert into matrix */
2400     jj = rstart*bs;
2401     for (i=0; i<m; i++) {
2402       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2403       mycols += locrowlens[i];
2404       vals   += locrowlens[i];
2405       jj++;
2406     }
2407   }
2408 
2409   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2410   ierr = PetscFree(buf);CHKERRQ(ierr);
2411   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2412   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2413   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2414   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2415   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2416   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2417   PetscFunctionReturn(0);
2418 }
2419 
2420 #undef __FUNCT__
2421 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2422 /*XXXXX@
2423    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2424 
2425    Input Parameters:
2426 .  mat  - the matrix
2427 .  fact - factor
2428 
2429    Not Collective on Mat, each process can have a different hash factor
2430 
2431    Level: advanced
2432 
2433   Notes:
2434    This can also be set by the command line option: -mat_use_hash_table fact
2435 
2436 .keywords: matrix, hashtable, factor, HT
2437 
2438 .seealso: MatSetOption()
2439 @XXXXX*/
2440 
2441 
2442 #undef __FUNCT__
2443 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2444 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2445 {
2446   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2447   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2448   PetscReal      atmp;
2449   PetscReal      *work,*svalues,*rvalues;
2450   PetscErrorCode ierr;
2451   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2452   PetscMPIInt    rank,size;
2453   PetscInt       *rowners_bs,dest,count,source;
2454   PetscScalar    *va;
2455   MatScalar      *ba;
2456   MPI_Status     stat;
2457 
2458   PetscFunctionBegin;
2459   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2460   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2461   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2462 
2463   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2464   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2465 
2466   bs  = A->rmap->bs;
2467   mbs = a->mbs;
2468   Mbs = a->Mbs;
2469   ba  = b->a;
2470   bi  = b->i;
2471   bj  = b->j;
2472 
2473   /* find ownerships */
2474   rowners_bs = A->rmap->range;
2475 
2476   /* each proc creates an array to be distributed */
2477   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2478   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2479 
2480   /* row_max for B */
2481   if (rank != size-1) {
2482     for (i=0; i<mbs; i++) {
2483       ncols = bi[1] - bi[0]; bi++;
2484       brow  = bs*i;
2485       for (j=0; j<ncols; j++) {
2486         bcol = bs*(*bj);
2487         for (kcol=0; kcol<bs; kcol++) {
2488           col  = bcol + kcol;                /* local col index */
2489           col += rowners_bs[rank+1];      /* global col index */
2490           for (krow=0; krow<bs; krow++) {
2491             atmp = PetscAbsScalar(*ba); ba++;
2492             row  = brow + krow;   /* local row index */
2493             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2494             if (work[col] < atmp) work[col] = atmp;
2495           }
2496         }
2497         bj++;
2498       }
2499     }
2500 
2501     /* send values to its owners */
2502     for (dest=rank+1; dest<size; dest++) {
2503       svalues = work + rowners_bs[dest];
2504       count   = rowners_bs[dest+1]-rowners_bs[dest];
2505       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2506     }
2507   }
2508 
2509   /* receive values */
2510   if (rank) {
2511     rvalues = work;
2512     count   = rowners_bs[rank+1]-rowners_bs[rank];
2513     for (source=0; source<rank; source++) {
2514       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2515       /* process values */
2516       for (i=0; i<count; i++) {
2517         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2518       }
2519     }
2520   }
2521 
2522   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2523   ierr = PetscFree(work);CHKERRQ(ierr);
2524   PetscFunctionReturn(0);
2525 }
2526 
2527 #undef __FUNCT__
2528 #define __FUNCT__ "MatSOR_MPISBAIJ"
2529 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2530 {
2531   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2532   PetscErrorCode    ierr;
2533   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2534   PetscScalar       *x,*ptr,*from;
2535   Vec               bb1;
2536   const PetscScalar *b;
2537 
2538   PetscFunctionBegin;
2539   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2540   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2541 
2542   if (flag == SOR_APPLY_UPPER) {
2543     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2544     PetscFunctionReturn(0);
2545   }
2546 
2547   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2548     if (flag & SOR_ZERO_INITIAL_GUESS) {
2549       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2550       its--;
2551     }
2552 
2553     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2554     while (its--) {
2555 
2556       /* lower triangular part: slvec0b = - B^T*xx */
2557       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2558 
2559       /* copy xx into slvec0a */
2560       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2561       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2562       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2563       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2564 
2565       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2566 
2567       /* copy bb into slvec1a */
2568       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2569       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2570       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2571       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2572 
2573       /* set slvec1b = 0 */
2574       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2575 
2576       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2577       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2578       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2579       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2580 
2581       /* upper triangular part: bb1 = bb1 - B*x */
2582       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2583 
2584       /* local diagonal sweep */
2585       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2586     }
2587     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2588   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2589     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2590   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2591     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2592   } else if (flag & SOR_EISENSTAT) {
2593     Vec               xx1;
2594     PetscBool         hasop;
2595     const PetscScalar *diag;
2596     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2597     PetscInt          i,n;
2598 
2599     if (!mat->xx1) {
2600       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2601       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2602     }
2603     xx1 = mat->xx1;
2604     bb1 = mat->bb1;
2605 
2606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2607 
2608     if (!mat->diag) {
2609       /* this is wrong for same matrix with new nonzero values */
2610       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2611       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2612     }
2613     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2614 
2615     if (hasop) {
2616       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2617       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2618     } else {
2619       /*
2620           These two lines are replaced by code that may be a bit faster for a good compiler
2621       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2622       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2623       */
2624       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2625       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2626       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2627       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2628       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2629       if (omega == 1.0) {
2630         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2631         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2632       } else {
2633         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2634         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2635       }
2636       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2637       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2638       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2639       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2640     }
2641 
2642     /* multiply off-diagonal portion of matrix */
2643     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2644     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2645     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2646     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2647     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2648     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2649     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2650     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2651     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2652     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2653 
2654     /* local sweep */
2655     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2656     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2657   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2658   PetscFunctionReturn(0);
2659 }
2660 
2661 #undef __FUNCT__
2662 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2663 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2664 {
2665   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2666   PetscErrorCode ierr;
2667   Vec            lvec1,bb1;
2668 
2669   PetscFunctionBegin;
2670   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2671   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2672 
2673   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2674     if (flag & SOR_ZERO_INITIAL_GUESS) {
2675       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2676       its--;
2677     }
2678 
2679     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2680     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2681     while (its--) {
2682       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2683 
2684       /* lower diagonal part: bb1 = bb - B^T*xx */
2685       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2686       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2687 
2688       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2689       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2690       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2691 
2692       /* upper diagonal part: bb1 = bb1 - B*x */
2693       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2694       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2695 
2696       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2697 
2698       /* diagonal sweep */
2699       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2700     }
2701     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2702     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2703   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2704   PetscFunctionReturn(0);
2705 }
2706 
2707 #undef __FUNCT__
2708 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2709 /*@
2710      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2711          CSR format the local rows.
2712 
2713    Collective on MPI_Comm
2714 
2715    Input Parameters:
2716 +  comm - MPI communicator
2717 .  bs - the block size, only a block size of 1 is supported
2718 .  m - number of local rows (Cannot be PETSC_DECIDE)
2719 .  n - This value should be the same as the local size used in creating the
2720        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2721        calculated if N is given) For square matrices n is almost always m.
2722 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2723 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2724 .   i - row indices
2725 .   j - column indices
2726 -   a - matrix values
2727 
2728    Output Parameter:
2729 .   mat - the matrix
2730 
2731    Level: intermediate
2732 
2733    Notes:
2734        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2735      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2736      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2737 
2738        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2739 
2740 .keywords: matrix, aij, compressed row, sparse, parallel
2741 
2742 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2743           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2744 @*/
2745 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2746 {
2747   PetscErrorCode ierr;
2748 
2749 
2750   PetscFunctionBegin;
2751   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2752   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2753   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2754   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2755   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2756   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2757   PetscFunctionReturn(0);
2758 }
2759 
2760 
2761 #undef __FUNCT__
2762 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2763 /*@C
2764    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2765    (the default parallel PETSc format).
2766 
2767    Collective on MPI_Comm
2768 
2769    Input Parameters:
2770 +  B - the matrix
2771 .  bs - the block size
2772 .  i - the indices into j for the start of each local row (starts with zero)
2773 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2774 -  v - optional values in the matrix
2775 
2776    Level: developer
2777 
2778 .keywords: matrix, aij, compressed row, sparse, parallel
2779 
2780 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2781 @*/
2782 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2783 {
2784   PetscErrorCode ierr;
2785 
2786   PetscFunctionBegin;
2787   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2788   PetscFunctionReturn(0);
2789 }
2790 
2791 
2792