xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d1da0b6977ee87f1a77143268e9e8db764d4e8b3)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84 a_noinsert:; \
85     ailen[brow] = nrow; \
86   }
87 
88 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
89   { \
90     brow = row/bs;  \
91     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
92     rmax = bimax[brow]; nrow = bilen[brow]; \
93     bcol = col/bs; \
94     ridx = row % bs; cidx = col % bs; \
95     low  = 0; high = nrow; \
96     while (high-low > 3) { \
97       t = (low+high)/2; \
98       if (rp[t] > bcol) high = t; \
99       else              low  = t; \
100     } \
101     for (_i=low; _i<high; _i++) { \
102       if (rp[_i] > bcol) break; \
103       if (rp[_i] == bcol) { \
104         bap = ap + bs2*_i + bs*cidx + ridx; \
105         if (addv == ADD_VALUES) *bap += value;  \
106         else                    *bap  = value;  \
107         goto b_noinsert; \
108       } \
109     } \
110     if (b->nonew == 1) goto b_noinsert; \
111     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
112     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
113     N = nrow++ - 1;  \
114     /* shift up all the later entries in this row */ \
115     for (ii=N; ii>=_i; ii--) { \
116       rp[ii+1] = rp[ii]; \
117       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
118     } \
119     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
120     rp[_i]                      = bcol;  \
121     ap[bs2*_i + bs*cidx + ridx] = value;  \
122 b_noinsert:; \
123     bilen[brow] = nrow; \
124   }
125 
126 /* Only add/insert a(i,j) with i<=j (blocks).
127    Any a(i,j) with i>j input by user is ingored.
128 */
129 #undef __FUNCT__
130 #define __FUNCT__ "MatSetValues_MPISBAIJ"
131 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
132 {
133   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
134   MatScalar      value;
135   PetscBool      roworiented = baij->roworiented;
136   PetscErrorCode ierr;
137   PetscInt       i,j,row,col;
138   PetscInt       rstart_orig=mat->rmap->rstart;
139   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
140   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
141 
142   /* Some Variables required in the macro */
143   Mat          A     = baij->A;
144   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
145   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
146   MatScalar    *aa   =a->a;
147 
148   Mat         B     = baij->B;
149   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
150   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
151   MatScalar   *ba   =b->a;
152 
153   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
154   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
155   MatScalar *ap,*bap;
156 
157   /* for stash */
158   PetscInt  n_loc, *in_loc = NULL;
159   MatScalar *v_loc = NULL;
160 
161   PetscFunctionBegin;
162   if (v) PetscValidScalarPointer(v,6);
163   if (!baij->donotstash) {
164     if (n > baij->n_loc) {
165       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
166       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
167       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
168       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
169 
170       baij->n_loc = n;
171     }
172     in_loc = baij->in_loc;
173     v_loc  = baij->v_loc;
174   }
175 
176   for (i=0; i<m; i++) {
177     if (im[i] < 0) continue;
178 #if defined(PETSC_USE_DEBUG)
179     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
180 #endif
181     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
182       row = im[i] - rstart_orig;              /* local row index */
183       for (j=0; j<n; j++) {
184         if (im[i]/bs > in[j]/bs) {
185           if (a->ignore_ltriangular) {
186             continue;    /* ignore lower triangular blocks */
187           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
188         }
189         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
190           col  = in[j] - cstart_orig;         /* local col index */
191           brow = row/bs; bcol = col/bs;
192           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
193           if (roworiented) value = v[i*n+j];
194           else             value = v[i+j*m];
195           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
196           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
197         } else if (in[j] < 0) continue;
198 #if defined(PETSC_USE_DEBUG)
199         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
200 #endif
201         else {  /* off-diag entry (B) */
202           if (mat->was_assembled) {
203             if (!baij->colmap) {
204               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
205             }
206 #if defined(PETSC_USE_CTABLE)
207             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
208             col  = col - 1;
209 #else
210             col = baij->colmap[in[j]/bs] - 1;
211 #endif
212             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
213               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
214               col  =  in[j];
215               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
216               B    = baij->B;
217               b    = (Mat_SeqBAIJ*)(B)->data;
218               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
219               ba   = b->a;
220             } else col += in[j]%bs;
221           } else col = in[j];
222           if (roworiented) value = v[i*n+j];
223           else             value = v[i+j*m];
224           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
225           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
226         }
227       }
228     } else {  /* off processor entry */
229       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
230       if (!baij->donotstash) {
231         mat->assembled = PETSC_FALSE;
232         n_loc          = 0;
233         for (j=0; j<n; j++) {
234           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
235           in_loc[n_loc] = in[j];
236           if (roworiented) {
237             v_loc[n_loc] = v[i*n+j];
238           } else {
239             v_loc[n_loc] = v[j*m+i];
240           }
241           n_loc++;
242         }
243         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
244       }
245     }
246   }
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
252 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
253 {
254   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
255   const MatScalar *value;
256   MatScalar       *barray     =baij->barray;
257   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
258   PetscErrorCode  ierr;
259   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
260   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
261   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
262 
263   PetscFunctionBegin;
264   if (!barray) {
265     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
266     baij->barray = barray;
267   }
268 
269   if (roworiented) {
270     stepval = (n-1)*bs;
271   } else {
272     stepval = (m-1)*bs;
273   }
274   for (i=0; i<m; i++) {
275     if (im[i] < 0) continue;
276 #if defined(PETSC_USE_DEBUG)
277     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
278 #endif
279     if (im[i] >= rstart && im[i] < rend) {
280       row = im[i] - rstart;
281       for (j=0; j<n; j++) {
282         if (im[i] > in[j]) {
283           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
284           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
285         }
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if ((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++ = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend) {
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         } else if (in[j] < 0) continue;
309 #if defined(PETSC_USE_DEBUG)
310         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
311 #endif
312         else {
313           if (mat->was_assembled) {
314             if (!baij->colmap) {
315               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
316             }
317 
318 #if defined(PETSC_USE_DEBUG)
319 #if defined(PETSC_USE_CTABLE)
320             { PetscInt data;
321               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
322               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
323             }
324 #else
325             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
326 #endif
327 #endif
328 #if defined(PETSC_USE_CTABLE)
329             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
330             col  = (col - 1)/bs;
331 #else
332             col = (baij->colmap[in[j]] - 1)/bs;
333 #endif
334             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
335               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
336               col  = in[j];
337             }
338           } else col = in[j];
339           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
340         }
341       }
342     } else {
343       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
344       if (!baij->donotstash) {
345         if (roworiented) {
346           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
347         } else {
348           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
349         }
350       }
351     }
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 #undef __FUNCT__
357 #define __FUNCT__ "MatGetValues_MPISBAIJ"
358 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
359 {
360   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
361   PetscErrorCode ierr;
362   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
363   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
364 
365   PetscFunctionBegin;
366   for (i=0; i<m; i++) {
367     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
368     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
369     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
370       row = idxm[i] - bsrstart;
371       for (j=0; j<n; j++) {
372         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
373         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
374         if (idxn[j] >= bscstart && idxn[j] < bscend) {
375           col  = idxn[j] - bscstart;
376           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
377         } else {
378           if (!baij->colmap) {
379             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
380           }
381 #if defined(PETSC_USE_CTABLE)
382           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
383           data--;
384 #else
385           data = baij->colmap[idxn[j]/bs]-1;
386 #endif
387           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
388           else {
389             col  = data + idxn[j]%bs;
390             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
391           }
392         }
393       }
394     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
395   }
396   PetscFunctionReturn(0);
397 }
398 
399 #undef __FUNCT__
400 #define __FUNCT__ "MatNorm_MPISBAIJ"
401 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
402 {
403   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
404   PetscErrorCode ierr;
405   PetscReal      sum[2],*lnorm2;
406 
407   PetscFunctionBegin;
408   if (baij->size == 1) {
409     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
410   } else {
411     if (type == NORM_FROBENIUS) {
412       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
413       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
414       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
415       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
416       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
417       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
418       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
419       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
420     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
421       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
422       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
423       PetscReal    *rsum,*rsum2,vabs;
424       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
425       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
426       MatScalar    *v;
427 
428       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
429       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
430       /* Amat */
431       v = amat->a; jj = amat->j;
432       for (brow=0; brow<mbs; brow++) {
433         grow = bs*(rstart + brow);
434         nz   = amat->i[brow+1] - amat->i[brow];
435         for (bcol=0; bcol<nz; bcol++) {
436           gcol = bs*(rstart + *jj); jj++;
437           for (col=0; col<bs; col++) {
438             for (row=0; row<bs; row++) {
439               vabs            = PetscAbsScalar(*v); v++;
440               rsum[gcol+col] += vabs;
441               /* non-diagonal block */
442               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
443             }
444           }
445         }
446       }
447       /* Bmat */
448       v = bmat->a; jj = bmat->j;
449       for (brow=0; brow<mbs; brow++) {
450         grow = bs*(rstart + brow);
451         nz = bmat->i[brow+1] - bmat->i[brow];
452         for (bcol=0; bcol<nz; bcol++) {
453           gcol = bs*garray[*jj]; jj++;
454           for (col=0; col<bs; col++) {
455             for (row=0; row<bs; row++) {
456               vabs            = PetscAbsScalar(*v); v++;
457               rsum[gcol+col] += vabs;
458               rsum[grow+row] += vabs;
459             }
460           }
461         }
462       }
463       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
464       *norm = 0.0;
465       for (col=0; col<mat->cmap->N; col++) {
466         if (rsum2[col] > *norm) *norm = rsum2[col];
467       }
468       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
469     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
476 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
477 {
478   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
479   PetscErrorCode ierr;
480   PetscInt       nstash,reallocs;
481   InsertMode     addv;
482 
483   PetscFunctionBegin;
484   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
485 
486   /* make sure all processors are either in INSERTMODE or ADDMODE */
487   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
488   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
489   mat->insertmode = addv; /* in case this processor had no cache */
490 
491   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
492   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
493   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
494   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
495   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
496   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
502 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
503 {
504   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
505   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
506   PetscErrorCode ierr;
507   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
508   PetscInt       *row,*col;
509   PetscBool      other_disassembled;
510   PetscMPIInt    n;
511   PetscBool      r1,r2,r3;
512   MatScalar      *val;
513   InsertMode     addv = mat->insertmode;
514 
515   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
516   PetscFunctionBegin;
517   if (!baij->donotstash &&  !mat->nooffprocentries) {
518     while (1) {
519       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
520       if (!flg) break;
521 
522       for (i=0; i<n;) {
523         /* Now identify the consecutive vals belonging to the same row */
524         for (j=i,rstart=row[j]; j<n; j++) {
525           if (row[j] != rstart) break;
526         }
527         if (j < n) ncols = j-i;
528         else       ncols = n-i;
529         /* Now assemble all these values with a single function call */
530         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
531         i    = j;
532       }
533     }
534     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
535     /* Now process the block-stash. Since the values are stashed column-oriented,
536        set the roworiented flag to column oriented, and after MatSetValues()
537        restore the original flags */
538     r1 = baij->roworiented;
539     r2 = a->roworiented;
540     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
541 
542     baij->roworiented = PETSC_FALSE;
543     a->roworiented    = PETSC_FALSE;
544 
545     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
546     while (1) {
547       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
548       if (!flg) break;
549 
550       for (i=0; i<n;) {
551         /* Now identify the consecutive vals belonging to the same row */
552         for (j=i,rstart=row[j]; j<n; j++) {
553           if (row[j] != rstart) break;
554         }
555         if (j < n) ncols = j-i;
556         else       ncols = n-i;
557         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
558         i    = j;
559       }
560     }
561     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
562 
563     baij->roworiented = r1;
564     a->roworiented    = r2;
565 
566     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
567   }
568 
569   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
570   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
571 
572   /* determine if any processor has disassembled, if so we must
573      also disassemble ourselfs, in order that we may reassemble. */
574   /*
575      if nonzero structure of submatrix B cannot change then we know that
576      no processor disassembled thus we can skip this stuff
577   */
578   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
579     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
580     if (mat->was_assembled && !other_disassembled) {
581       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
582     }
583   }
584 
585   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
586     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
587   }
588   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
589   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
590 
591   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
592 
593   baij->rowvalues = 0;
594   PetscFunctionReturn(0);
595 }
596 
597 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
598 #include <petscdraw.h>
599 #undef __FUNCT__
600 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
601 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
602 {
603   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
604   PetscErrorCode    ierr;
605   PetscInt          bs   = mat->rmap->bs;
606   PetscMPIInt       size = baij->size,rank = baij->rank;
607   PetscBool         iascii,isdraw;
608   PetscViewer       sviewer;
609   PetscViewerFormat format;
610 
611   PetscFunctionBegin;
612   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
613   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
614   if (iascii) {
615     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
616     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
617       MatInfo info;
618       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
619       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
620       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
621       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
622       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
624       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
626       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
628       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
629       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
630       PetscFunctionReturn(0);
631     } else if (format == PETSC_VIEWER_ASCII_INFO) {
632       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
635       PetscFunctionReturn(0);
636     }
637   }
638 
639   if (isdraw) {
640     PetscDraw draw;
641     PetscBool isnull;
642     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
643     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
644   }
645 
646   if (size == 1) {
647     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
648     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
649   } else {
650     /* assemble the entire matrix onto first processor. */
651     Mat          A;
652     Mat_SeqSBAIJ *Aloc;
653     Mat_SeqBAIJ  *Bloc;
654     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
655     MatScalar    *a;
656 
657     /* Should this be the same type as mat? */
658     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
659     if (!rank) {
660       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
661     } else {
662       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
663     }
664     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
665     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
666     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
667     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
668 
669     /* copy over the A part */
670     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
671     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
672     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
673 
674     for (i=0; i<mbs; i++) {
675       rvals[0] = bs*(baij->rstartbs + i);
676       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
677       for (j=ai[i]; j<ai[i+1]; j++) {
678         col = (baij->cstartbs+aj[j])*bs;
679         for (k=0; k<bs; k++) {
680           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
681           col++;
682           a += bs;
683         }
684       }
685     }
686     /* copy over the B part */
687     Bloc = (Mat_SeqBAIJ*)baij->B->data;
688     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
689     for (i=0; i<mbs; i++) {
690 
691       rvals[0] = bs*(baij->rstartbs + i);
692       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
693       for (j=ai[i]; j<ai[i+1]; j++) {
694         col = baij->garray[aj[j]]*bs;
695         for (k=0; k<bs; k++) {
696           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
697           col++;
698           a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
713       ierr = PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);CHKERRQ(ierr);
714       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
715     }
716     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
717     ierr = MatDestroy(&A);CHKERRQ(ierr);
718   }
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatView_MPISBAIJ"
724 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
725 {
726   PetscErrorCode ierr;
727   PetscBool      iascii,isdraw,issocket,isbinary;
728 
729   PetscFunctionBegin;
730   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
731   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
732   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
733   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
734   if (iascii || isdraw || issocket || isbinary) {
735     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
755 #if defined(PETSC_USE_CTABLE)
756   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
762   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
763   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
765   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
768   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
769   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
770   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
771   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
775 #if defined(PETSC_USE_REAL_MAT_SINGLE)
776   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
777 #endif
778   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
779   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
780   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
781   ierr = PetscFree(mat->data);CHKERRQ(ierr);
782 
783   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
784   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
794 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
795 {
796   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
797   PetscErrorCode ierr;
798   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
799   PetscScalar    *x,*from;
800 
801   PetscFunctionBegin;
802   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
803   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
804 
805   /* diagonal part */
806   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
807   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
808 
809   /* subdiagonal part */
810   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
811 
812   /* copy x into the vec slvec0 */
813   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
814   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
815 
816   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
817   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
818   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
819 
820   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
821   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822   /* supperdiagonal part */
823   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
824   PetscFunctionReturn(0);
825 }
826 
827 #undef __FUNCT__
828 #define __FUNCT__ "MatMult_MPISBAIJ"
829 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
830 {
831   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
832   PetscErrorCode ierr;
833   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
834   PetscScalar    *x,*from;
835 
836   PetscFunctionBegin;
837   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
838   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
839 
840   /* diagonal part */
841   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
842   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
843 
844   /* subdiagonal part */
845   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
846 
847   /* copy x into the vec slvec0 */
848   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
849   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
850 
851   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
852   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
853   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
854 
855   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
856   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857   /* supperdiagonal part */
858   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
864 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
865 {
866   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
867   PetscErrorCode ierr;
868   PetscInt       nt;
869 
870   PetscFunctionBegin;
871   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
872   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
873 
874   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
875   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
876 
877   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
878   /* do diagonal part */
879   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
880   /* do supperdiagonal part */
881   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
882   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
883   /* do subdiagonal part */
884   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
885   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
886   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
887   PetscFunctionReturn(0);
888 }
889 
890 #undef __FUNCT__
891 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
892 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
893 {
894   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
895   PetscErrorCode ierr;
896   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
897   PetscScalar    *x,*from,zero=0.0;
898 
899   PetscFunctionBegin;
900   /*
901   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
902   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
903   */
904   /* diagonal part */
905   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
906   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
907 
908   /* subdiagonal part */
909   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
910 
911   /* copy x into the vec slvec0 */
912   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
913   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
914   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
915   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
916 
917   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
919   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
920 
921   /* supperdiagonal part */
922   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
923   PetscFunctionReturn(0);
924 }
925 
926 #undef __FUNCT__
927 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
928 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
929 {
930   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
931   PetscErrorCode ierr;
932 
933   PetscFunctionBegin;
934   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
935   /* do diagonal part */
936   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
937   /* do supperdiagonal part */
938   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
940 
941   /* do subdiagonal part */
942   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
943   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
944   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
945   PetscFunctionReturn(0);
946 }
947 
948 /*
949   This only works correctly for square matrices where the subblock A->A is the
950    diagonal block
951 */
952 #undef __FUNCT__
953 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
954 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
955 {
956   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
957   PetscErrorCode ierr;
958 
959   PetscFunctionBegin;
960   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
961   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatScale_MPISBAIJ"
967 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
968 {
969   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
970   PetscErrorCode ierr;
971 
972   PetscFunctionBegin;
973   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
974   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
975   PetscFunctionReturn(0);
976 }
977 
978 #undef __FUNCT__
979 #define __FUNCT__ "MatGetRow_MPISBAIJ"
980 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
981 {
982   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
983   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
984   PetscErrorCode ierr;
985   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
986   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
987   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
988 
989   PetscFunctionBegin;
990   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
991   mat->getrowactive = PETSC_TRUE;
992 
993   if (!mat->rowvalues && (idx || v)) {
994     /*
995         allocate enough space to hold information from the longest row.
996     */
997     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
998     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
999     PetscInt     max = 1,mbs = mat->mbs,tmp;
1000     for (i=0; i<mbs; i++) {
1001       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1002       if (max < tmp) max = tmp;
1003     }
1004     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1005   }
1006 
1007   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1008   lrow = row - brstart;  /* local row index */
1009 
1010   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1011   if (!v)   {pvA = 0; pvB = 0;}
1012   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1013   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1014   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1015   nztot = nzA + nzB;
1016 
1017   cmap = mat->garray;
1018   if (v  || idx) {
1019     if (nztot) {
1020       /* Sort by increasing column numbers, assuming A and B already sorted */
1021       PetscInt imark = -1;
1022       if (v) {
1023         *v = v_p = mat->rowvalues;
1024         for (i=0; i<nzB; i++) {
1025           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1026           else break;
1027         }
1028         imark = i;
1029         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1030         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1031       }
1032       if (idx) {
1033         *idx = idx_p = mat->rowindices;
1034         if (imark > -1) {
1035           for (i=0; i<imark; i++) {
1036             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1037           }
1038         } else {
1039           for (i=0; i<nzB; i++) {
1040             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1041             else break;
1042           }
1043           imark = i;
1044         }
1045         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1046         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1047       }
1048     } else {
1049       if (idx) *idx = 0;
1050       if (v)   *v   = 0;
1051     }
1052   }
1053   *nz  = nztot;
1054   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1055   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1061 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1062 {
1063   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1064 
1065   PetscFunctionBegin;
1066   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1067   baij->getrowactive = PETSC_FALSE;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1073 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1074 {
1075   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1076   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1077 
1078   PetscFunctionBegin;
1079   aA->getrow_utriangular = PETSC_TRUE;
1080   PetscFunctionReturn(0);
1081 }
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1084 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1085 {
1086   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1087   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1088 
1089   PetscFunctionBegin;
1090   aA->getrow_utriangular = PETSC_FALSE;
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1096 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1097 {
1098   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1099   PetscErrorCode ierr;
1100 
1101   PetscFunctionBegin;
1102   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1103   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1109 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1110 {
1111   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1112   PetscErrorCode ierr;
1113 
1114   PetscFunctionBegin;
1115   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1116   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1117   PetscFunctionReturn(0);
1118 }
1119 
1120 #undef __FUNCT__
1121 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1122 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1123 {
1124   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1125   PetscErrorCode ierr;
1126 
1127   PetscFunctionBegin;
1128   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1129   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1135 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1136 {
1137   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1138   Mat            A  = a->A,B = a->B;
1139   PetscErrorCode ierr;
1140   PetscReal      isend[5],irecv[5];
1141 
1142   PetscFunctionBegin;
1143   info->block_size = (PetscReal)matin->rmap->bs;
1144 
1145   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1146 
1147   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1148   isend[3] = info->memory;  isend[4] = info->mallocs;
1149 
1150   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1151 
1152   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1153   isend[3] += info->memory;  isend[4] += info->mallocs;
1154   if (flag == MAT_LOCAL) {
1155     info->nz_used      = isend[0];
1156     info->nz_allocated = isend[1];
1157     info->nz_unneeded  = isend[2];
1158     info->memory       = isend[3];
1159     info->mallocs      = isend[4];
1160   } else if (flag == MAT_GLOBAL_MAX) {
1161     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1162 
1163     info->nz_used      = irecv[0];
1164     info->nz_allocated = irecv[1];
1165     info->nz_unneeded  = irecv[2];
1166     info->memory       = irecv[3];
1167     info->mallocs      = irecv[4];
1168   } else if (flag == MAT_GLOBAL_SUM) {
1169     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1170 
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1177   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1178   info->fill_ratio_needed = 0;
1179   info->factor_mallocs    = 0;
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1185 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1186 {
1187   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1188   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1189   PetscErrorCode ierr;
1190 
1191   PetscFunctionBegin;
1192   switch (op) {
1193   case MAT_NEW_NONZERO_LOCATIONS:
1194   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1195   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1196   case MAT_KEEP_NONZERO_PATTERN:
1197   case MAT_NEW_NONZERO_LOCATION_ERR:
1198     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1199     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1200     break;
1201   case MAT_ROW_ORIENTED:
1202     a->roworiented = flg;
1203 
1204     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1205     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1206     break;
1207   case MAT_NEW_DIAGONALS:
1208     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1209     break;
1210   case MAT_IGNORE_OFF_PROC_ENTRIES:
1211     a->donotstash = flg;
1212     break;
1213   case MAT_USE_HASH_TABLE:
1214     a->ht_flag = flg;
1215     break;
1216   case MAT_HERMITIAN:
1217     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1218     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1219 
1220     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1221     break;
1222   case MAT_SPD:
1223     A->spd_set = PETSC_TRUE;
1224     A->spd     = flg;
1225     if (flg) {
1226       A->symmetric                  = PETSC_TRUE;
1227       A->structurally_symmetric     = PETSC_TRUE;
1228       A->symmetric_set              = PETSC_TRUE;
1229       A->structurally_symmetric_set = PETSC_TRUE;
1230     }
1231     break;
1232   case MAT_SYMMETRIC:
1233     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1234     break;
1235   case MAT_STRUCTURALLY_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_SYMMETRY_ETERNAL:
1239     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1240     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1241     break;
1242   case MAT_IGNORE_LOWER_TRIANGULAR:
1243     aA->ignore_ltriangular = flg;
1244     break;
1245   case MAT_ERROR_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_GETROW_UPPERTRIANGULAR:
1249     aA->getrow_utriangular = flg;
1250     break;
1251   default:
1252     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1259 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1260 {
1261   PetscErrorCode ierr;
1262 
1263   PetscFunctionBegin;
1264   if (MAT_INITIAL_MATRIX || *B != A) {
1265     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1273 {
1274   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1275   Mat            a     = baij->A, b=baij->B;
1276   PetscErrorCode ierr;
1277   PetscInt       nv,m,n;
1278   PetscBool      flg;
1279 
1280   PetscFunctionBegin;
1281   if (ll != rr) {
1282     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1283     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1284   }
1285   if (!ll) PetscFunctionReturn(0);
1286 
1287   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1288   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1289 
1290   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1291   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1292 
1293   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1294 
1295   /* left diagonalscale the off-diagonal part */
1296   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1297 
1298   /* scale the diagonal part */
1299   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1300 
1301   /* right diagonalscale the off-diagonal part */
1302   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1303   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1309 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1310 {
1311   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatEqual_MPISBAIJ"
1323 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1324 {
1325   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1326   Mat            a,b,c,d;
1327   PetscBool      flg;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   a = matA->A; b = matA->B;
1332   c = matB->A; d = matB->B;
1333 
1334   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1335   if (flg) {
1336     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1337   }
1338   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatCopy_MPISBAIJ"
1344 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1345 {
1346   PetscErrorCode ierr;
1347   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1348   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1349 
1350   PetscFunctionBegin;
1351   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1352   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1353     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1354     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1355     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1356   } else {
1357     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1358     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1359   }
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1365 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1366 {
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1377 {
1378   PetscErrorCode ierr;
1379   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1380   PetscBLASInt   bnz,one=1;
1381   Mat_SeqSBAIJ   *xa,*ya;
1382   Mat_SeqBAIJ    *xb,*yb;
1383 
1384   PetscFunctionBegin;
1385   if (str == SAME_NONZERO_PATTERN) {
1386     PetscScalar alpha = a;
1387     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1388     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1389     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1390     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1391     xb   = (Mat_SeqBAIJ*)xx->B->data;
1392     yb   = (Mat_SeqBAIJ*)yy->B->data;
1393     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1394     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1395   } else {
1396     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1397     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1398     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1405 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1406 {
1407   PetscErrorCode ierr;
1408   PetscInt       i;
1409   PetscBool      flg;
1410 
1411   PetscFunctionBegin;
1412   for (i=0; i<n; i++) {
1413     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1414     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1415   }
1416   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 
1421 /* -------------------------------------------------------------------*/
1422 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1423                                        MatGetRow_MPISBAIJ,
1424                                        MatRestoreRow_MPISBAIJ,
1425                                        MatMult_MPISBAIJ,
1426                                /*  4*/ MatMultAdd_MPISBAIJ,
1427                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1428                                        MatMultAdd_MPISBAIJ,
1429                                        0,
1430                                        0,
1431                                        0,
1432                                /* 10*/ 0,
1433                                        0,
1434                                        0,
1435                                        MatSOR_MPISBAIJ,
1436                                        MatTranspose_MPISBAIJ,
1437                                /* 15*/ MatGetInfo_MPISBAIJ,
1438                                        MatEqual_MPISBAIJ,
1439                                        MatGetDiagonal_MPISBAIJ,
1440                                        MatDiagonalScale_MPISBAIJ,
1441                                        MatNorm_MPISBAIJ,
1442                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1443                                        MatAssemblyEnd_MPISBAIJ,
1444                                        MatSetOption_MPISBAIJ,
1445                                        MatZeroEntries_MPISBAIJ,
1446                                /* 24*/ 0,
1447                                        0,
1448                                        0,
1449                                        0,
1450                                        0,
1451                                /* 29*/ MatSetUp_MPISBAIJ,
1452                                        0,
1453                                        0,
1454                                        0,
1455                                        0,
1456                                /* 34*/ MatDuplicate_MPISBAIJ,
1457                                        0,
1458                                        0,
1459                                        0,
1460                                        0,
1461                                /* 39*/ MatAXPY_MPISBAIJ,
1462                                        MatGetSubMatrices_MPISBAIJ,
1463                                        MatIncreaseOverlap_MPISBAIJ,
1464                                        MatGetValues_MPISBAIJ,
1465                                        MatCopy_MPISBAIJ,
1466                                /* 44*/ 0,
1467                                        MatScale_MPISBAIJ,
1468                                        0,
1469                                        0,
1470                                        0,
1471                                /* 49*/ 0,
1472                                        0,
1473                                        0,
1474                                        0,
1475                                        0,
1476                                /* 54*/ 0,
1477                                        0,
1478                                        MatSetUnfactored_MPISBAIJ,
1479                                        0,
1480                                        MatSetValuesBlocked_MPISBAIJ,
1481                                /* 59*/ 0,
1482                                        0,
1483                                        0,
1484                                        0,
1485                                        0,
1486                                /* 64*/ 0,
1487                                        0,
1488                                        0,
1489                                        0,
1490                                        0,
1491                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1492                                        0,
1493                                        0,
1494                                        0,
1495                                        0,
1496                                /* 74*/ 0,
1497                                        0,
1498                                        0,
1499                                        0,
1500                                        0,
1501                                /* 79*/ 0,
1502                                        0,
1503                                        0,
1504                                        0,
1505                                        MatLoad_MPISBAIJ,
1506                                /* 84*/ 0,
1507                                        0,
1508                                        0,
1509                                        0,
1510                                        0,
1511                                /* 89*/ 0,
1512                                        0,
1513                                        0,
1514                                        0,
1515                                        0,
1516                                /* 94*/ 0,
1517                                        0,
1518                                        0,
1519                                        0,
1520                                        0,
1521                                /* 99*/ 0,
1522                                        0,
1523                                        0,
1524                                        0,
1525                                        0,
1526                                /*104*/ 0,
1527                                        MatRealPart_MPISBAIJ,
1528                                        MatImaginaryPart_MPISBAIJ,
1529                                        MatGetRowUpperTriangular_MPISBAIJ,
1530                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1531                                /*109*/ 0,
1532                                        0,
1533                                        0,
1534                                        0,
1535                                        0,
1536                                /*114*/ 0,
1537                                        0,
1538                                        0,
1539                                        0,
1540                                        0,
1541                                /*119*/ 0,
1542                                        0,
1543                                        0,
1544                                        0,
1545                                        0,
1546                                /*124*/ 0,
1547                                        0,
1548                                        0,
1549                                        0,
1550                                        0,
1551                                /*129*/ 0,
1552                                        0,
1553                                        0,
1554                                        0,
1555                                        0,
1556                                /*134*/ 0,
1557                                        0,
1558                                        0,
1559                                        0,
1560                                        0,
1561                                /*139*/ 0,
1562                                        0,
1563                                        0
1564 };
1565 
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1569 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1570 {
1571   PetscFunctionBegin;
1572   *a = ((Mat_MPISBAIJ*)A->data)->A;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1578 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1579 {
1580   Mat_MPISBAIJ   *b;
1581   PetscErrorCode ierr;
1582   PetscInt       i,mbs,Mbs;
1583 
1584   PetscFunctionBegin;
1585   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1586   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1587   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1588   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1589   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1590 
1591   b   = (Mat_MPISBAIJ*)B->data;
1592   mbs = B->rmap->n/bs;
1593   Mbs = B->rmap->N/bs;
1594   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1595 
1596   B->rmap->bs = bs;
1597   b->bs2      = bs*bs;
1598   b->mbs      = mbs;
1599   b->nbs      = mbs;
1600   b->Mbs      = Mbs;
1601   b->Nbs      = Mbs;
1602 
1603   for (i=0; i<=b->size; i++) {
1604     b->rangebs[i] = B->rmap->range[i]/bs;
1605   }
1606   b->rstartbs = B->rmap->rstart/bs;
1607   b->rendbs   = B->rmap->rend/bs;
1608 
1609   b->cstartbs = B->cmap->rstart/bs;
1610   b->cendbs   = B->cmap->rend/bs;
1611 
1612   if (!B->preallocated) {
1613     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1614     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1615     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1616     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1617     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1618     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1619     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1620     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1621     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1622   }
1623 
1624   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1625   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1626 
1627   B->preallocated = PETSC_TRUE;
1628   PetscFunctionReturn(0);
1629 }
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1633 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1634 {
1635   PetscInt       m,rstart,cstart,cend;
1636   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1637   const PetscInt *JJ    =0;
1638   PetscScalar    *values=0;
1639   PetscErrorCode ierr;
1640 
1641   PetscFunctionBegin;
1642   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1643   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1644   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1645   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1646   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1647   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1648   m      = B->rmap->n/bs;
1649   rstart = B->rmap->rstart/bs;
1650   cstart = B->cmap->rstart/bs;
1651   cend   = B->cmap->rend/bs;
1652 
1653   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1654   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1655   for (i=0; i<m; i++) {
1656     nz = ii[i+1] - ii[i];
1657     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1658     nz_max = PetscMax(nz_max,nz);
1659     JJ     = jj + ii[i];
1660     for (j=0; j<nz; j++) {
1661       if (*JJ >= cstart) break;
1662       JJ++;
1663     }
1664     d = 0;
1665     for (; j<nz; j++) {
1666       if (*JJ++ >= cend) break;
1667       d++;
1668     }
1669     d_nnz[i] = d;
1670     o_nnz[i] = nz - d;
1671   }
1672   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1673   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1674 
1675   values = (PetscScalar*)V;
1676   if (!values) {
1677     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1678     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1679   }
1680   for (i=0; i<m; i++) {
1681     PetscInt          row    = i + rstart;
1682     PetscInt          ncols  = ii[i+1] - ii[i];
1683     const PetscInt    *icols = jj + ii[i];
1684     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1685     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1686   }
1687 
1688   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1689   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1690   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1691   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1692   PetscFunctionReturn(0);
1693 }
1694 
1695 #if defined(PETSC_HAVE_MUMPS)
1696 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1697 #endif
1698 #if defined(PETSC_HAVE_PASTIX)
1699 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1700 #endif
1701 
1702 /*MC
1703    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1704    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1705    the matrix is stored.
1706 
1707   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1708   can call MatSetOption(Mat, MAT_HERMITIAN);
1709 
1710    Options Database Keys:
1711 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1712 
1713   Level: beginner
1714 
1715 .seealso: MatCreateMPISBAIJ
1716 M*/
1717 
1718 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1719 
1720 #undef __FUNCT__
1721 #define __FUNCT__ "MatCreate_MPISBAIJ"
1722 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1723 {
1724   Mat_MPISBAIJ   *b;
1725   PetscErrorCode ierr;
1726   PetscBool      flg;
1727 
1728   PetscFunctionBegin;
1729   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1730   B->data = (void*)b;
1731   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1732 
1733   B->ops->destroy = MatDestroy_MPISBAIJ;
1734   B->ops->view    = MatView_MPISBAIJ;
1735   B->assembled    = PETSC_FALSE;
1736   B->insertmode   = NOT_SET_VALUES;
1737 
1738   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1739   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1740 
1741   /* build local table of row and column ownerships */
1742   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1743 
1744   /* build cache for off array entries formed */
1745   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1746 
1747   b->donotstash  = PETSC_FALSE;
1748   b->colmap      = NULL;
1749   b->garray      = NULL;
1750   b->roworiented = PETSC_TRUE;
1751 
1752   /* stuff used in block assembly */
1753   b->barray = 0;
1754 
1755   /* stuff used for matrix vector multiply */
1756   b->lvec    = 0;
1757   b->Mvctx   = 0;
1758   b->slvec0  = 0;
1759   b->slvec0b = 0;
1760   b->slvec1  = 0;
1761   b->slvec1a = 0;
1762   b->slvec1b = 0;
1763   b->sMvctx  = 0;
1764 
1765   /* stuff for MatGetRow() */
1766   b->rowindices   = 0;
1767   b->rowvalues    = 0;
1768   b->getrowactive = PETSC_FALSE;
1769 
1770   /* hash table stuff */
1771   b->ht           = 0;
1772   b->hd           = 0;
1773   b->ht_size      = 0;
1774   b->ht_flag      = PETSC_FALSE;
1775   b->ht_fact      = 0;
1776   b->ht_total_ct  = 0;
1777   b->ht_insert_ct = 0;
1778 
1779   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1780   b->ijonly = PETSC_FALSE;
1781 
1782   b->in_loc = 0;
1783   b->v_loc  = 0;
1784   b->n_loc  = 0;
1785   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1786   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1787   if (flg) {
1788     PetscReal fact = 1.39;
1789     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1790     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1791     if (fact <= 1.0) fact = 1.39;
1792     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1793     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1794   }
1795   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1796 
1797 #if defined(PETSC_HAVE_PASTIX)
1798   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1799 #endif
1800 #if defined(PETSC_HAVE_MUMPS)
1801   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1802 #endif
1803   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1804   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1805   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1807   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1809 
1810   B->symmetric                  = PETSC_TRUE;
1811   B->structurally_symmetric     = PETSC_TRUE;
1812   B->symmetric_set              = PETSC_TRUE;
1813   B->structurally_symmetric_set = PETSC_TRUE;
1814 
1815   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 /*MC
1820    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1821 
1822    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1823    and MATMPISBAIJ otherwise.
1824 
1825    Options Database Keys:
1826 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1827 
1828   Level: beginner
1829 
1830 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1831 M*/
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1835 /*@C
1836    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1837    the user should preallocate the matrix storage by setting the parameters
1838    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1839    performance can be increased by more than a factor of 50.
1840 
1841    Collective on Mat
1842 
1843    Input Parameters:
1844 +  A - the matrix
1845 .  bs   - size of blockk
1846 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1847            submatrix  (same for all local rows)
1848 .  d_nnz - array containing the number of block nonzeros in the various block rows
1849            in the upper triangular and diagonal part of the in diagonal portion of the local
1850            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1851            for the diagonal entry and set a value even if it is zero.
1852 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1853            submatrix (same for all local rows).
1854 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1855            off-diagonal portion of the local submatrix that is right of the diagonal
1856            (possibly different for each block row) or NULL.
1857 
1858 
1859    Options Database Keys:
1860 .   -mat_no_unroll - uses code that does not unroll the loops in the
1861                      block calculations (much slower)
1862 .   -mat_block_size - size of the blocks to use
1863 
1864    Notes:
1865 
1866    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1867    than it must be used on all processors that share the object for that argument.
1868 
1869    If the *_nnz parameter is given then the *_nz parameter is ignored
1870 
1871    Storage Information:
1872    For a square global matrix we define each processor's diagonal portion
1873    to be its local rows and the corresponding columns (a square submatrix);
1874    each processor's off-diagonal portion encompasses the remainder of the
1875    local matrix (a rectangular submatrix).
1876 
1877    The user can specify preallocated storage for the diagonal part of
1878    the local submatrix with either d_nz or d_nnz (not both).  Set
1879    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1880    memory allocation.  Likewise, specify preallocated storage for the
1881    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1882 
1883    You can call MatGetInfo() to get information on how effective the preallocation was;
1884    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1885    You can also run with the option -info and look for messages with the string
1886    malloc in them to see if additional memory allocation was needed.
1887 
1888    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1889    the figure below we depict these three local rows and all columns (0-11).
1890 
1891 .vb
1892            0 1 2 3 4 5 6 7 8 9 10 11
1893           --------------------------
1894    row 3  |. . . d d d o o o o  o  o
1895    row 4  |. . . d d d o o o o  o  o
1896    row 5  |. . . d d d o o o o  o  o
1897           --------------------------
1898 .ve
1899 
1900    Thus, any entries in the d locations are stored in the d (diagonal)
1901    submatrix, and any entries in the o locations are stored in the
1902    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1903    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1904 
1905    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1906    plus the diagonal part of the d matrix,
1907    and o_nz should indicate the number of block nonzeros per row in the o matrix
1908 
1909    In general, for PDE problems in which most nonzeros are near the diagonal,
1910    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1911    or you will get TERRIBLE performance; see the users' manual chapter on
1912    matrices.
1913 
1914    Level: intermediate
1915 
1916 .keywords: matrix, block, aij, compressed row, sparse, parallel
1917 
1918 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1919 @*/
1920 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1921 {
1922   PetscErrorCode ierr;
1923 
1924   PetscFunctionBegin;
1925   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1926   PetscValidType(B,1);
1927   PetscValidLogicalCollectiveInt(B,bs,2);
1928   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1929   PetscFunctionReturn(0);
1930 }
1931 
1932 #undef __FUNCT__
1933 #define __FUNCT__ "MatCreateSBAIJ"
1934 /*@C
1935    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1936    (block compressed row).  For good matrix assembly performance
1937    the user should preallocate the matrix storage by setting the parameters
1938    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1939    performance can be increased by more than a factor of 50.
1940 
1941    Collective on MPI_Comm
1942 
1943    Input Parameters:
1944 +  comm - MPI communicator
1945 .  bs   - size of blockk
1946 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1947            This value should be the same as the local size used in creating the
1948            y vector for the matrix-vector product y = Ax.
1949 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1950            This value should be the same as the local size used in creating the
1951            x vector for the matrix-vector product y = Ax.
1952 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1953 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1954 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1955            submatrix  (same for all local rows)
1956 .  d_nnz - array containing the number of block nonzeros in the various block rows
1957            in the upper triangular portion of the in diagonal portion of the local
1958            (possibly different for each block block row) or NULL.
1959            If you plan to factor the matrix you must leave room for the diagonal entry and
1960            set its value even if it is zero.
1961 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1962            submatrix (same for all local rows).
1963 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1964            off-diagonal portion of the local submatrix (possibly different for
1965            each block row) or NULL.
1966 
1967    Output Parameter:
1968 .  A - the matrix
1969 
1970    Options Database Keys:
1971 .   -mat_no_unroll - uses code that does not unroll the loops in the
1972                      block calculations (much slower)
1973 .   -mat_block_size - size of the blocks to use
1974 .   -mat_mpi - use the parallel matrix data structures even on one processor
1975                (defaults to using SeqBAIJ format on one processor)
1976 
1977    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1978    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1979    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1980 
1981    Notes:
1982    The number of rows and columns must be divisible by blocksize.
1983    This matrix type does not support complex Hermitian operation.
1984 
1985    The user MUST specify either the local or global matrix dimensions
1986    (possibly both).
1987 
1988    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1989    than it must be used on all processors that share the object for that argument.
1990 
1991    If the *_nnz parameter is given then the *_nz parameter is ignored
1992 
1993    Storage Information:
1994    For a square global matrix we define each processor's diagonal portion
1995    to be its local rows and the corresponding columns (a square submatrix);
1996    each processor's off-diagonal portion encompasses the remainder of the
1997    local matrix (a rectangular submatrix).
1998 
1999    The user can specify preallocated storage for the diagonal part of
2000    the local submatrix with either d_nz or d_nnz (not both).  Set
2001    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2002    memory allocation.  Likewise, specify preallocated storage for the
2003    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2004 
2005    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2006    the figure below we depict these three local rows and all columns (0-11).
2007 
2008 .vb
2009            0 1 2 3 4 5 6 7 8 9 10 11
2010           --------------------------
2011    row 3  |. . . d d d o o o o  o  o
2012    row 4  |. . . d d d o o o o  o  o
2013    row 5  |. . . d d d o o o o  o  o
2014           --------------------------
2015 .ve
2016 
2017    Thus, any entries in the d locations are stored in the d (diagonal)
2018    submatrix, and any entries in the o locations are stored in the
2019    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2020    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2021 
2022    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2023    plus the diagonal part of the d matrix,
2024    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2025    In general, for PDE problems in which most nonzeros are near the diagonal,
2026    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2027    or you will get TERRIBLE performance; see the users' manual chapter on
2028    matrices.
2029 
2030    Level: intermediate
2031 
2032 .keywords: matrix, block, aij, compressed row, sparse, parallel
2033 
2034 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2035 @*/
2036 
2037 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2038 {
2039   PetscErrorCode ierr;
2040   PetscMPIInt    size;
2041 
2042   PetscFunctionBegin;
2043   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2044   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2045   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2046   if (size > 1) {
2047     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2048     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2049   } else {
2050     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2051     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2052   }
2053   PetscFunctionReturn(0);
2054 }
2055 
2056 
2057 #undef __FUNCT__
2058 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2059 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2060 {
2061   Mat            mat;
2062   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2063   PetscErrorCode ierr;
2064   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2065   PetscScalar    *array;
2066 
2067   PetscFunctionBegin;
2068   *newmat = 0;
2069 
2070   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2071   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2072   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2073   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2074   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2075   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2076 
2077   mat->factortype   = matin->factortype;
2078   mat->preallocated = PETSC_TRUE;
2079   mat->assembled    = PETSC_TRUE;
2080   mat->insertmode   = NOT_SET_VALUES;
2081 
2082   a      = (Mat_MPISBAIJ*)mat->data;
2083   a->bs2 = oldmat->bs2;
2084   a->mbs = oldmat->mbs;
2085   a->nbs = oldmat->nbs;
2086   a->Mbs = oldmat->Mbs;
2087   a->Nbs = oldmat->Nbs;
2088 
2089 
2090   a->size         = oldmat->size;
2091   a->rank         = oldmat->rank;
2092   a->donotstash   = oldmat->donotstash;
2093   a->roworiented  = oldmat->roworiented;
2094   a->rowindices   = 0;
2095   a->rowvalues    = 0;
2096   a->getrowactive = PETSC_FALSE;
2097   a->barray       = 0;
2098   a->rstartbs     = oldmat->rstartbs;
2099   a->rendbs       = oldmat->rendbs;
2100   a->cstartbs     = oldmat->cstartbs;
2101   a->cendbs       = oldmat->cendbs;
2102 
2103   /* hash table stuff */
2104   a->ht           = 0;
2105   a->hd           = 0;
2106   a->ht_size      = 0;
2107   a->ht_flag      = oldmat->ht_flag;
2108   a->ht_fact      = oldmat->ht_fact;
2109   a->ht_total_ct  = 0;
2110   a->ht_insert_ct = 0;
2111 
2112   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2113   if (oldmat->colmap) {
2114 #if defined(PETSC_USE_CTABLE)
2115     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2116 #else
2117     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2118     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2119     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2120 #endif
2121   } else a->colmap = 0;
2122 
2123   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2124     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2125     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2126     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2127   } else a->garray = 0;
2128 
2129   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2130   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2132   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2134 
2135   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2137   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2139 
2140   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2141   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2142   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2143   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2144   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2145   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2146   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2147   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2148   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2149   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2150   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2151   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2153 
2154   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2155   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2156   a->sMvctx = oldmat->sMvctx;
2157   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2158 
2159   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2160   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2161   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2162   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2163   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2164   *newmat = mat;
2165   PetscFunctionReturn(0);
2166 }
2167 
2168 #undef __FUNCT__
2169 #define __FUNCT__ "MatLoad_MPISBAIJ"
2170 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2171 {
2172   PetscErrorCode ierr;
2173   PetscInt       i,nz,j,rstart,rend;
2174   PetscScalar    *vals,*buf;
2175   MPI_Comm       comm;
2176   MPI_Status     status;
2177   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2178   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2179   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2180   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2181   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2182   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2183   int            fd;
2184 
2185   PetscFunctionBegin;
2186   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2187   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2188   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2189   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2190 
2191   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2192   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2193   if (!rank) {
2194     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2195     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2196     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2197     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2198   }
2199 
2200   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2201 
2202   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2203   M    = header[1];
2204   N    = header[2];
2205 
2206   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2207   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2208   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2209 
2210   /* If global sizes are set, check if they are consistent with that given in the file */
2211   if (sizesset) {
2212     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2213   }
2214   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2215   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2216 
2217   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2218 
2219   /*
2220      This code adds extra rows to make sure the number of rows is
2221      divisible by the blocksize
2222   */
2223   Mbs        = M/bs;
2224   extra_rows = bs - M + bs*(Mbs);
2225   if (extra_rows == bs) extra_rows = 0;
2226   else                  Mbs++;
2227   if (extra_rows &&!rank) {
2228     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2229   }
2230 
2231   /* determine ownership of all rows */
2232   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2233     mbs = Mbs/size + ((Mbs % size) > rank);
2234     m   = mbs*bs;
2235   } else { /* User Set */
2236     m   = newmat->rmap->n;
2237     mbs = m/bs;
2238   }
2239   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2240   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2241   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2242   rowners[0] = 0;
2243   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2244   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2245   rstart = rowners[rank];
2246   rend   = rowners[rank+1];
2247 
2248   /* distribute row lengths to all processors */
2249   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2250   if (!rank) {
2251     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2252     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2253     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2254     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2255     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2256     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2257     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2258   } else {
2259     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2260   }
2261 
2262   if (!rank) {   /* procs[0] */
2263     /* calculate the number of nonzeros on each processor */
2264     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2265     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2266     for (i=0; i<size; i++) {
2267       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2268         procsnz[i] += rowlengths[j];
2269       }
2270     }
2271     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2272 
2273     /* determine max buffer needed and allocate it */
2274     maxnz = 0;
2275     for (i=0; i<size; i++) {
2276       maxnz = PetscMax(maxnz,procsnz[i]);
2277     }
2278     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2279 
2280     /* read in my part of the matrix column indices  */
2281     nz     = procsnz[0];
2282     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2283     mycols = ibuf;
2284     if (size == 1) nz -= extra_rows;
2285     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2286     if (size == 1) {
2287       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2288     }
2289 
2290     /* read in every ones (except the last) and ship off */
2291     for (i=1; i<size-1; i++) {
2292       nz   = procsnz[i];
2293       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2294       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2295     }
2296     /* read in the stuff for the last proc */
2297     if (size != 1) {
2298       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2299       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2300       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2301       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2302     }
2303     ierr = PetscFree(cols);CHKERRQ(ierr);
2304   } else {  /* procs[i], i>0 */
2305     /* determine buffer space needed for message */
2306     nz = 0;
2307     for (i=0; i<m; i++) nz += locrowlens[i];
2308     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2309     mycols = ibuf;
2310     /* receive message of column indices*/
2311     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2312     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2313     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2314   }
2315 
2316   /* loop over local rows, determining number of off diagonal entries */
2317   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2318   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2319   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2320   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2321   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2322   rowcount = 0;
2323   nzcount  = 0;
2324   for (i=0; i<mbs; i++) {
2325     dcount  = 0;
2326     odcount = 0;
2327     for (j=0; j<bs; j++) {
2328       kmax = locrowlens[rowcount];
2329       for (k=0; k<kmax; k++) {
2330         tmp = mycols[nzcount++]/bs; /* block col. index */
2331         if (!mask[tmp]) {
2332           mask[tmp] = 1;
2333           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2334           else masked1[dcount++] = tmp; /* entry in diag portion */
2335         }
2336       }
2337       rowcount++;
2338     }
2339 
2340     dlens[i]  = dcount;  /* d_nzz[i] */
2341     odlens[i] = odcount; /* o_nzz[i] */
2342 
2343     /* zero out the mask elements we set */
2344     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2345     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2346   }
2347   if (!sizesset) {
2348     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2349   }
2350   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2351   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2352 
2353   if (!rank) {
2354     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2355     /* read in my part of the matrix numerical values  */
2356     nz     = procsnz[0];
2357     vals   = buf;
2358     mycols = ibuf;
2359     if (size == 1) nz -= extra_rows;
2360     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2361     if (size == 1) {
2362       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2363     }
2364 
2365     /* insert into matrix */
2366     jj = rstart*bs;
2367     for (i=0; i<m; i++) {
2368       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2369       mycols += locrowlens[i];
2370       vals   += locrowlens[i];
2371       jj++;
2372     }
2373 
2374     /* read in other processors (except the last one) and ship out */
2375     for (i=1; i<size-1; i++) {
2376       nz   = procsnz[i];
2377       vals = buf;
2378       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2379       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2380     }
2381     /* the last proc */
2382     if (size != 1) {
2383       nz   = procsnz[i] - extra_rows;
2384       vals = buf;
2385       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2386       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2387       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2388     }
2389     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2390 
2391   } else {
2392     /* receive numeric values */
2393     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2394 
2395     /* receive message of values*/
2396     vals   = buf;
2397     mycols = ibuf;
2398     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2399     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2400     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2401 
2402     /* insert into matrix */
2403     jj = rstart*bs;
2404     for (i=0; i<m; i++) {
2405       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2406       mycols += locrowlens[i];
2407       vals   += locrowlens[i];
2408       jj++;
2409     }
2410   }
2411 
2412   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2413   ierr = PetscFree(buf);CHKERRQ(ierr);
2414   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2415   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2416   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2417   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2418   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2419   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2420   PetscFunctionReturn(0);
2421 }
2422 
2423 #undef __FUNCT__
2424 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2425 /*XXXXX@
2426    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2427 
2428    Input Parameters:
2429 .  mat  - the matrix
2430 .  fact - factor
2431 
2432    Not Collective on Mat, each process can have a different hash factor
2433 
2434    Level: advanced
2435 
2436   Notes:
2437    This can also be set by the command line option: -mat_use_hash_table fact
2438 
2439 .keywords: matrix, hashtable, factor, HT
2440 
2441 .seealso: MatSetOption()
2442 @XXXXX*/
2443 
2444 
2445 #undef __FUNCT__
2446 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2447 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2448 {
2449   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2450   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2451   PetscReal      atmp;
2452   PetscReal      *work,*svalues,*rvalues;
2453   PetscErrorCode ierr;
2454   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2455   PetscMPIInt    rank,size;
2456   PetscInt       *rowners_bs,dest,count,source;
2457   PetscScalar    *va;
2458   MatScalar      *ba;
2459   MPI_Status     stat;
2460 
2461   PetscFunctionBegin;
2462   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2463   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2464   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2465 
2466   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2467   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2468 
2469   bs  = A->rmap->bs;
2470   mbs = a->mbs;
2471   Mbs = a->Mbs;
2472   ba  = b->a;
2473   bi  = b->i;
2474   bj  = b->j;
2475 
2476   /* find ownerships */
2477   rowners_bs = A->rmap->range;
2478 
2479   /* each proc creates an array to be distributed */
2480   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2481   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2482 
2483   /* row_max for B */
2484   if (rank != size-1) {
2485     for (i=0; i<mbs; i++) {
2486       ncols = bi[1] - bi[0]; bi++;
2487       brow  = bs*i;
2488       for (j=0; j<ncols; j++) {
2489         bcol = bs*(*bj);
2490         for (kcol=0; kcol<bs; kcol++) {
2491           col  = bcol + kcol;                /* local col index */
2492           col += rowners_bs[rank+1];      /* global col index */
2493           for (krow=0; krow<bs; krow++) {
2494             atmp = PetscAbsScalar(*ba); ba++;
2495             row  = brow + krow;   /* local row index */
2496             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2497             if (work[col] < atmp) work[col] = atmp;
2498           }
2499         }
2500         bj++;
2501       }
2502     }
2503 
2504     /* send values to its owners */
2505     for (dest=rank+1; dest<size; dest++) {
2506       svalues = work + rowners_bs[dest];
2507       count   = rowners_bs[dest+1]-rowners_bs[dest];
2508       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2509     }
2510   }
2511 
2512   /* receive values */
2513   if (rank) {
2514     rvalues = work;
2515     count   = rowners_bs[rank+1]-rowners_bs[rank];
2516     for (source=0; source<rank; source++) {
2517       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2518       /* process values */
2519       for (i=0; i<count; i++) {
2520         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2521       }
2522     }
2523   }
2524 
2525   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2526   ierr = PetscFree(work);CHKERRQ(ierr);
2527   PetscFunctionReturn(0);
2528 }
2529 
2530 #undef __FUNCT__
2531 #define __FUNCT__ "MatSOR_MPISBAIJ"
2532 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2533 {
2534   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2535   PetscErrorCode    ierr;
2536   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2537   PetscScalar       *x,*ptr,*from;
2538   Vec               bb1;
2539   const PetscScalar *b;
2540 
2541   PetscFunctionBegin;
2542   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2543   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2544 
2545   if (flag == SOR_APPLY_UPPER) {
2546     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2547     PetscFunctionReturn(0);
2548   }
2549 
2550   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2551     if (flag & SOR_ZERO_INITIAL_GUESS) {
2552       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2553       its--;
2554     }
2555 
2556     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2557     while (its--) {
2558 
2559       /* lower triangular part: slvec0b = - B^T*xx */
2560       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2561 
2562       /* copy xx into slvec0a */
2563       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2564       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2565       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2566       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2567 
2568       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2569 
2570       /* copy bb into slvec1a */
2571       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2572       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2573       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2574       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2575 
2576       /* set slvec1b = 0 */
2577       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2578 
2579       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2580       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2581       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2582       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2583 
2584       /* upper triangular part: bb1 = bb1 - B*x */
2585       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2586 
2587       /* local diagonal sweep */
2588       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2589     }
2590     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2591   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2592     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2593   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2594     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2595   } else if (flag & SOR_EISENSTAT) {
2596     Vec               xx1;
2597     PetscBool         hasop;
2598     const PetscScalar *diag;
2599     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2600     PetscInt          i,n;
2601 
2602     if (!mat->xx1) {
2603       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2604       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2605     }
2606     xx1 = mat->xx1;
2607     bb1 = mat->bb1;
2608 
2609     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2610 
2611     if (!mat->diag) {
2612       /* this is wrong for same matrix with new nonzero values */
2613       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2614       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2615     }
2616     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2617 
2618     if (hasop) {
2619       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2620       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2621     } else {
2622       /*
2623           These two lines are replaced by code that may be a bit faster for a good compiler
2624       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2625       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2626       */
2627       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2628       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2629       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2630       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2631       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2632       if (omega == 1.0) {
2633         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2634         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2635       } else {
2636         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2637         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2638       }
2639       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2640       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2641       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2642       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2643     }
2644 
2645     /* multiply off-diagonal portion of matrix */
2646     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2647     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2648     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2649     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2650     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2651     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2652     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2653     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2654     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2655     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2656 
2657     /* local sweep */
2658     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2659     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2660   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2661   PetscFunctionReturn(0);
2662 }
2663 
2664 #undef __FUNCT__
2665 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2666 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2667 {
2668   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2669   PetscErrorCode ierr;
2670   Vec            lvec1,bb1;
2671 
2672   PetscFunctionBegin;
2673   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2674   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2675 
2676   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2677     if (flag & SOR_ZERO_INITIAL_GUESS) {
2678       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2679       its--;
2680     }
2681 
2682     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2683     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2684     while (its--) {
2685       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2686 
2687       /* lower diagonal part: bb1 = bb - B^T*xx */
2688       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2689       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2690 
2691       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2692       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2693       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2694 
2695       /* upper diagonal part: bb1 = bb1 - B*x */
2696       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2697       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2698 
2699       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2700 
2701       /* diagonal sweep */
2702       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2703     }
2704     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2705     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2706   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2707   PetscFunctionReturn(0);
2708 }
2709 
2710 #undef __FUNCT__
2711 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2712 /*@
2713      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2714          CSR format the local rows.
2715 
2716    Collective on MPI_Comm
2717 
2718    Input Parameters:
2719 +  comm - MPI communicator
2720 .  bs - the block size, only a block size of 1 is supported
2721 .  m - number of local rows (Cannot be PETSC_DECIDE)
2722 .  n - This value should be the same as the local size used in creating the
2723        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2724        calculated if N is given) For square matrices n is almost always m.
2725 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2726 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2727 .   i - row indices
2728 .   j - column indices
2729 -   a - matrix values
2730 
2731    Output Parameter:
2732 .   mat - the matrix
2733 
2734    Level: intermediate
2735 
2736    Notes:
2737        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2738      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2739      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2740 
2741        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2742 
2743 .keywords: matrix, aij, compressed row, sparse, parallel
2744 
2745 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2746           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2747 @*/
2748 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2749 {
2750   PetscErrorCode ierr;
2751 
2752 
2753   PetscFunctionBegin;
2754   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2755   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2756   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2757   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2758   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2759   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2760   PetscFunctionReturn(0);
2761 }
2762 
2763 
2764 #undef __FUNCT__
2765 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2766 /*@C
2767    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2768    (the default parallel PETSc format).
2769 
2770    Collective on MPI_Comm
2771 
2772    Input Parameters:
2773 +  A - the matrix
2774 .  bs - the block size
2775 .  i - the indices into j for the start of each local row (starts with zero)
2776 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2777 -  v - optional values in the matrix
2778 
2779    Level: developer
2780 
2781 .keywords: matrix, aij, compressed row, sparse, parallel
2782 
2783 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2784 @*/
2785 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2786 {
2787   PetscErrorCode ierr;
2788 
2789   PetscFunctionBegin;
2790   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2791   PetscFunctionReturn(0);
2792 }
2793 
2794 
2795