xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision eb975fb2d04d974246739c9c552b1cfae0c3a9e4)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84 a_noinsert:; \
85     ailen[brow] = nrow; \
86   }
87 
88 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
89   { \
90     brow = row/bs;  \
91     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
92     rmax = bimax[brow]; nrow = bilen[brow]; \
93     bcol = col/bs; \
94     ridx = row % bs; cidx = col % bs; \
95     low  = 0; high = nrow; \
96     while (high-low > 3) { \
97       t = (low+high)/2; \
98       if (rp[t] > bcol) high = t; \
99       else              low  = t; \
100     } \
101     for (_i=low; _i<high; _i++) { \
102       if (rp[_i] > bcol) break; \
103       if (rp[_i] == bcol) { \
104         bap = ap + bs2*_i + bs*cidx + ridx; \
105         if (addv == ADD_VALUES) *bap += value;  \
106         else                    *bap  = value;  \
107         goto b_noinsert; \
108       } \
109     } \
110     if (b->nonew == 1) goto b_noinsert; \
111     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
112     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
113     N = nrow++ - 1;  \
114     /* shift up all the later entries in this row */ \
115     for (ii=N; ii>=_i; ii--) { \
116       rp[ii+1] = rp[ii]; \
117       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
118     } \
119     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
120     rp[_i]                      = bcol;  \
121     ap[bs2*_i + bs*cidx + ridx] = value;  \
122 b_noinsert:; \
123     bilen[brow] = nrow; \
124   }
125 
126 /* Only add/insert a(i,j) with i<=j (blocks).
127    Any a(i,j) with i>j input by user is ingored.
128 */
129 #undef __FUNCT__
130 #define __FUNCT__ "MatSetValues_MPISBAIJ"
131 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
132 {
133   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
134   MatScalar      value;
135   PetscBool      roworiented = baij->roworiented;
136   PetscErrorCode ierr;
137   PetscInt       i,j,row,col;
138   PetscInt       rstart_orig=mat->rmap->rstart;
139   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
140   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
141 
142   /* Some Variables required in the macro */
143   Mat          A     = baij->A;
144   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
145   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
146   MatScalar    *aa   =a->a;
147 
148   Mat         B     = baij->B;
149   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
150   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
151   MatScalar   *ba   =b->a;
152 
153   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
154   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
155   MatScalar *ap,*bap;
156 
157   /* for stash */
158   PetscInt  n_loc, *in_loc = NULL;
159   MatScalar *v_loc = NULL;
160 
161   PetscFunctionBegin;
162   if (v) PetscValidScalarPointer(v,6);
163   if (!baij->donotstash) {
164     if (n > baij->n_loc) {
165       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
166       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
167       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
168       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
169 
170       baij->n_loc = n;
171     }
172     in_loc = baij->in_loc;
173     v_loc  = baij->v_loc;
174   }
175 
176   for (i=0; i<m; i++) {
177     if (im[i] < 0) continue;
178 #if defined(PETSC_USE_DEBUG)
179     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
180 #endif
181     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
182       row = im[i] - rstart_orig;              /* local row index */
183       for (j=0; j<n; j++) {
184         if (im[i]/bs > in[j]/bs) {
185           if (a->ignore_ltriangular) {
186             continue;    /* ignore lower triangular blocks */
187           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
188         }
189         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
190           col  = in[j] - cstart_orig;         /* local col index */
191           brow = row/bs; bcol = col/bs;
192           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
193           if (roworiented) value = v[i*n+j];
194           else             value = v[i+j*m];
195           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
196           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
197         } else if (in[j] < 0) continue;
198 #if defined(PETSC_USE_DEBUG)
199         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
200 #endif
201         else {  /* off-diag entry (B) */
202           if (mat->was_assembled) {
203             if (!baij->colmap) {
204               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
205             }
206 #if defined(PETSC_USE_CTABLE)
207             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
208             col  = col - 1;
209 #else
210             col = baij->colmap[in[j]/bs] - 1;
211 #endif
212             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
213               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
214               col  =  in[j];
215               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
216               B    = baij->B;
217               b    = (Mat_SeqBAIJ*)(B)->data;
218               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
219               ba   = b->a;
220             } else col += in[j]%bs;
221           } else col = in[j];
222           if (roworiented) value = v[i*n+j];
223           else             value = v[i+j*m];
224           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
225           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
226         }
227       }
228     } else {  /* off processor entry */
229       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
230       if (!baij->donotstash) {
231         mat->assembled = PETSC_FALSE;
232         n_loc          = 0;
233         for (j=0; j<n; j++) {
234           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
235           in_loc[n_loc] = in[j];
236           if (roworiented) {
237             v_loc[n_loc] = v[i*n+j];
238           } else {
239             v_loc[n_loc] = v[j*m+i];
240           }
241           n_loc++;
242         }
243         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
244       }
245     }
246   }
247   PetscFunctionReturn(0);
248 }
249 
250 #undef __FUNCT__
251 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
252 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
253 {
254   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
255   const MatScalar *value;
256   MatScalar       *barray     =baij->barray;
257   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
258   PetscErrorCode  ierr;
259   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
260   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
261   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
262 
263   PetscFunctionBegin;
264   if (!barray) {
265     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
266     baij->barray = barray;
267   }
268 
269   if (roworiented) {
270     stepval = (n-1)*bs;
271   } else {
272     stepval = (m-1)*bs;
273   }
274   for (i=0; i<m; i++) {
275     if (im[i] < 0) continue;
276 #if defined(PETSC_USE_DEBUG)
277     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
278 #endif
279     if (im[i] >= rstart && im[i] < rend) {
280       row = im[i] - rstart;
281       for (j=0; j<n; j++) {
282         if (im[i] > in[j]) {
283           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
284           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
285         }
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if ((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++ = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend) {
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         } else if (in[j] < 0) continue;
309 #if defined(PETSC_USE_DEBUG)
310         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
311 #endif
312         else {
313           if (mat->was_assembled) {
314             if (!baij->colmap) {
315               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
316             }
317 
318 #if defined(PETSC_USE_DEBUG)
319 #if defined(PETSC_USE_CTABLE)
320             { PetscInt data;
321               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
322               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
323             }
324 #else
325             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
326 #endif
327 #endif
328 #if defined(PETSC_USE_CTABLE)
329             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
330             col  = (col - 1)/bs;
331 #else
332             col = (baij->colmap[in[j]] - 1)/bs;
333 #endif
334             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
335               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
336               col  = in[j];
337             }
338           } else col = in[j];
339           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
340         }
341       }
342     } else {
343       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
344       if (!baij->donotstash) {
345         if (roworiented) {
346           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
347         } else {
348           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
349         }
350       }
351     }
352   }
353   PetscFunctionReturn(0);
354 }
355 
356 #undef __FUNCT__
357 #define __FUNCT__ "MatGetValues_MPISBAIJ"
358 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
359 {
360   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
361   PetscErrorCode ierr;
362   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
363   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
364 
365   PetscFunctionBegin;
366   for (i=0; i<m; i++) {
367     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
368     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
369     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
370       row = idxm[i] - bsrstart;
371       for (j=0; j<n; j++) {
372         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
373         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
374         if (idxn[j] >= bscstart && idxn[j] < bscend) {
375           col  = idxn[j] - bscstart;
376           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
377         } else {
378           if (!baij->colmap) {
379             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
380           }
381 #if defined(PETSC_USE_CTABLE)
382           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
383           data--;
384 #else
385           data = baij->colmap[idxn[j]/bs]-1;
386 #endif
387           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
388           else {
389             col  = data + idxn[j]%bs;
390             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
391           }
392         }
393       }
394     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
395   }
396   PetscFunctionReturn(0);
397 }
398 
399 #undef __FUNCT__
400 #define __FUNCT__ "MatNorm_MPISBAIJ"
401 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
402 {
403   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
404   PetscErrorCode ierr;
405   PetscReal      sum[2],*lnorm2;
406 
407   PetscFunctionBegin;
408   if (baij->size == 1) {
409     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
410   } else {
411     if (type == NORM_FROBENIUS) {
412       ierr    = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
413       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
414       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
415       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
416       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
417       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
418       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
419       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
420     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
421       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
422       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
423       PetscReal    *rsum,*rsum2,vabs;
424       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
425       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
426       MatScalar    *v;
427 
428       ierr = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
429       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
430       /* Amat */
431       v = amat->a; jj = amat->j;
432       for (brow=0; brow<mbs; brow++) {
433         grow = bs*(rstart + brow);
434         nz   = amat->i[brow+1] - amat->i[brow];
435         for (bcol=0; bcol<nz; bcol++) {
436           gcol = bs*(rstart + *jj); jj++;
437           for (col=0; col<bs; col++) {
438             for (row=0; row<bs; row++) {
439               vabs            = PetscAbsScalar(*v); v++;
440               rsum[gcol+col] += vabs;
441               /* non-diagonal block */
442               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
443             }
444           }
445         }
446       }
447       /* Bmat */
448       v = bmat->a; jj = bmat->j;
449       for (brow=0; brow<mbs; brow++) {
450         grow = bs*(rstart + brow);
451         nz = bmat->i[brow+1] - bmat->i[brow];
452         for (bcol=0; bcol<nz; bcol++) {
453           gcol = bs*garray[*jj]; jj++;
454           for (col=0; col<bs; col++) {
455             for (row=0; row<bs; row++) {
456               vabs            = PetscAbsScalar(*v); v++;
457               rsum[gcol+col] += vabs;
458               rsum[grow+row] += vabs;
459             }
460           }
461         }
462       }
463       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
464       *norm = 0.0;
465       for (col=0; col<mat->cmap->N; col++) {
466         if (rsum2[col] > *norm) *norm = rsum2[col];
467       }
468       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
469     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
470   }
471   PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
476 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
477 {
478   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
479   PetscErrorCode ierr;
480   PetscInt       nstash,reallocs;
481   InsertMode     addv;
482 
483   PetscFunctionBegin;
484   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
485 
486   /* make sure all processors are either in INSERTMODE or ADDMODE */
487   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
488   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
489   mat->insertmode = addv; /* in case this processor had no cache */
490 
491   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
492   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
493   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
494   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
495   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
496   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
497   PetscFunctionReturn(0);
498 }
499 
500 #undef __FUNCT__
501 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
502 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
503 {
504   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
505   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
506   PetscErrorCode ierr;
507   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
508   PetscInt       *row,*col;
509   PetscBool      other_disassembled;
510   PetscMPIInt    n;
511   PetscBool      r1,r2,r3;
512   MatScalar      *val;
513   InsertMode     addv = mat->insertmode;
514 
515   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
516   PetscFunctionBegin;
517   if (!baij->donotstash &&  !mat->nooffprocentries) {
518     while (1) {
519       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
520       if (!flg) break;
521 
522       for (i=0; i<n;) {
523         /* Now identify the consecutive vals belonging to the same row */
524         for (j=i,rstart=row[j]; j<n; j++) {
525           if (row[j] != rstart) break;
526         }
527         if (j < n) ncols = j-i;
528         else       ncols = n-i;
529         /* Now assemble all these values with a single function call */
530         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
531         i    = j;
532       }
533     }
534     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
535     /* Now process the block-stash. Since the values are stashed column-oriented,
536        set the roworiented flag to column oriented, and after MatSetValues()
537        restore the original flags */
538     r1 = baij->roworiented;
539     r2 = a->roworiented;
540     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
541 
542     baij->roworiented = PETSC_FALSE;
543     a->roworiented    = PETSC_FALSE;
544 
545     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
546     while (1) {
547       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
548       if (!flg) break;
549 
550       for (i=0; i<n;) {
551         /* Now identify the consecutive vals belonging to the same row */
552         for (j=i,rstart=row[j]; j<n; j++) {
553           if (row[j] != rstart) break;
554         }
555         if (j < n) ncols = j-i;
556         else       ncols = n-i;
557         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
558         i    = j;
559       }
560     }
561     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
562 
563     baij->roworiented = r1;
564     a->roworiented    = r2;
565 
566     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
567   }
568 
569   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
570   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
571 
572   /* determine if any processor has disassembled, if so we must
573      also disassemble ourselfs, in order that we may reassemble. */
574   /*
575      if nonzero structure of submatrix B cannot change then we know that
576      no processor disassembled thus we can skip this stuff
577   */
578   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
579     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
580     if (mat->was_assembled && !other_disassembled) {
581       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
582     }
583   }
584 
585   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
586     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
587   }
588   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
589   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
590   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
591 
592   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
593 
594   baij->rowvalues = 0;
595   PetscFunctionReturn(0);
596 }
597 
598 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
599 #include <petscdraw.h>
600 #undef __FUNCT__
601 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
602 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
603 {
604   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
605   PetscErrorCode    ierr;
606   PetscInt          bs   = mat->rmap->bs;
607   PetscMPIInt       size = baij->size,rank = baij->rank;
608   PetscBool         iascii,isdraw;
609   PetscViewer       sviewer;
610   PetscViewerFormat format;
611 
612   PetscFunctionBegin;
613   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
614   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
615   if (iascii) {
616     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
617     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
618       MatInfo info;
619       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
620       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
621       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
622       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
623       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
624       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
625       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
627       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
629       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
630       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
631       PetscFunctionReturn(0);
632     } else if (format == PETSC_VIEWER_ASCII_INFO) {
633       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
636       PetscFunctionReturn(0);
637     }
638   }
639 
640   if (isdraw) {
641     PetscDraw draw;
642     PetscBool isnull;
643     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
644     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
645   }
646 
647   if (size == 1) {
648     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
649     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
650   } else {
651     /* assemble the entire matrix onto first processor. */
652     Mat          A;
653     Mat_SeqSBAIJ *Aloc;
654     Mat_SeqBAIJ  *Bloc;
655     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
656     MatScalar    *a;
657 
658     /* Should this be the same type as mat? */
659     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
660     if (!rank) {
661       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
662     } else {
663       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
664     }
665     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
666     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
667     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
668     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
669 
670     /* copy over the A part */
671     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
672     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
673     ierr = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
674 
675     for (i=0; i<mbs; i++) {
676       rvals[0] = bs*(baij->rstartbs + i);
677       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
678       for (j=ai[i]; j<ai[i+1]; j++) {
679         col = (baij->cstartbs+aj[j])*bs;
680         for (k=0; k<bs; k++) {
681           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
682           col++;
683           a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++;
699           a += bs;
700         }
701       }
702     }
703     ierr = PetscFree(rvals);CHKERRQ(ierr);
704     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     /*
707        Everyone has to call to draw the matrix since the graphics waits are
708        synchronized across all processors that share the PetscDraw object
709     */
710     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
713       /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
714       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
715       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
716     }
717     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     ierr = MatDestroy(&A);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPISBAIJ"
725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
726 {
727   PetscErrorCode ierr;
728   PetscBool      iascii,isdraw,issocket,isbinary;
729 
730   PetscFunctionBegin;
731   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
732   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
733   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
734   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
735   if (iascii || isdraw || issocket || isbinary) {
736     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
737   }
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatDestroy_MPISBAIJ"
743 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
744 {
745   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
746   PetscErrorCode ierr;
747 
748   PetscFunctionBegin;
749 #if defined(PETSC_USE_LOG)
750   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
751 #endif
752   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
753   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
754   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
755   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
756 #if defined(PETSC_USE_CTABLE)
757   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
758 #else
759   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
760 #endif
761   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
762   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
763   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
765   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
769   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
770   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
771   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
772   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
773   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
774   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
776 #if defined(PETSC_USE_REAL_MAT_SINGLE)
777   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
778 #endif
779   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
780   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
781   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
782   ierr = PetscFree(mat->data);CHKERRQ(ierr);
783 
784   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",NULL);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",NULL);CHKERRQ(ierr);
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
795 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
796 {
797   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
798   PetscErrorCode ierr;
799   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
800   PetscScalar    *x,*from;
801 
802   PetscFunctionBegin;
803   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
804   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
805 
806   /* diagonal part */
807   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
808   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
809 
810   /* subdiagonal part */
811   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
812 
813   /* copy x into the vec slvec0 */
814   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
815   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
816 
817   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
818   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
822   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823   /* supperdiagonal part */
824   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
825   PetscFunctionReturn(0);
826 }
827 
828 #undef __FUNCT__
829 #define __FUNCT__ "MatMult_MPISBAIJ"
830 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
831 {
832   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
833   PetscErrorCode ierr;
834   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
835   PetscScalar    *x,*from;
836 
837   PetscFunctionBegin;
838   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
839   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
840 
841   /* diagonal part */
842   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
843   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
844 
845   /* subdiagonal part */
846   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
847 
848   /* copy x into the vec slvec0 */
849   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
850   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
851 
852   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
853   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
857   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858   /* supperdiagonal part */
859   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
865 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
866 {
867   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
868   PetscErrorCode ierr;
869   PetscInt       nt;
870 
871   PetscFunctionBegin;
872   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
873   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
874 
875   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
876   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
877 
878   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
879   /* do diagonal part */
880   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
881   /* do supperdiagonal part */
882   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
884   /* do subdiagonal part */
885   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
886   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
887   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
888   PetscFunctionReturn(0);
889 }
890 
891 #undef __FUNCT__
892 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
893 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
894 {
895   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
896   PetscErrorCode ierr;
897   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
898   PetscScalar    *x,*from,zero=0.0;
899 
900   PetscFunctionBegin;
901   /*
902   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
903   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A));
904   */
905   /* diagonal part */
906   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
907   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
908 
909   /* subdiagonal part */
910   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
911 
912   /* copy x into the vec slvec0 */
913   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
914   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
915   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
916   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
917 
918   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921 
922   /* supperdiagonal part */
923   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
924   PetscFunctionReturn(0);
925 }
926 
927 #undef __FUNCT__
928 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
929 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
930 {
931   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
932   PetscErrorCode ierr;
933 
934   PetscFunctionBegin;
935   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
936   /* do diagonal part */
937   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
938   /* do supperdiagonal part */
939   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
941 
942   /* do subdiagonal part */
943   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
944   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
945   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 /*
950   This only works correctly for square matrices where the subblock A->A is the
951    diagonal block
952 */
953 #undef __FUNCT__
954 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
955 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
956 {
957   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
958   PetscErrorCode ierr;
959 
960   PetscFunctionBegin;
961   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
962   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatScale_MPISBAIJ"
968 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
969 {
970   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
971   PetscErrorCode ierr;
972 
973   PetscFunctionBegin;
974   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
975   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
976   PetscFunctionReturn(0);
977 }
978 
979 #undef __FUNCT__
980 #define __FUNCT__ "MatGetRow_MPISBAIJ"
981 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
982 {
983   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
984   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
985   PetscErrorCode ierr;
986   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
987   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
988   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
989 
990   PetscFunctionBegin;
991   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
992   mat->getrowactive = PETSC_TRUE;
993 
994   if (!mat->rowvalues && (idx || v)) {
995     /*
996         allocate enough space to hold information from the longest row.
997     */
998     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
999     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1000     PetscInt     max = 1,mbs = mat->mbs,tmp;
1001     for (i=0; i<mbs; i++) {
1002       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1003       if (max < tmp) max = tmp;
1004     }
1005     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1006   }
1007 
1008   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1009   lrow = row - brstart;  /* local row index */
1010 
1011   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1012   if (!v)   {pvA = 0; pvB = 0;}
1013   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1014   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1015   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1016   nztot = nzA + nzB;
1017 
1018   cmap = mat->garray;
1019   if (v  || idx) {
1020     if (nztot) {
1021       /* Sort by increasing column numbers, assuming A and B already sorted */
1022       PetscInt imark = -1;
1023       if (v) {
1024         *v = v_p = mat->rowvalues;
1025         for (i=0; i<nzB; i++) {
1026           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1027           else break;
1028         }
1029         imark = i;
1030         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1031         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1032       }
1033       if (idx) {
1034         *idx = idx_p = mat->rowindices;
1035         if (imark > -1) {
1036           for (i=0; i<imark; i++) {
1037             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1038           }
1039         } else {
1040           for (i=0; i<nzB; i++) {
1041             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1042             else break;
1043           }
1044           imark = i;
1045         }
1046         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1047         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1048       }
1049     } else {
1050       if (idx) *idx = 0;
1051       if (v)   *v   = 0;
1052     }
1053   }
1054   *nz  = nztot;
1055   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1056   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1062 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1063 {
1064   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1065 
1066   PetscFunctionBegin;
1067   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1068   baij->getrowactive = PETSC_FALSE;
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1074 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1075 {
1076   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1077   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1078 
1079   PetscFunctionBegin;
1080   aA->getrow_utriangular = PETSC_TRUE;
1081   PetscFunctionReturn(0);
1082 }
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1085 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1086 {
1087   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1088   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1089 
1090   PetscFunctionBegin;
1091   aA->getrow_utriangular = PETSC_FALSE;
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1097 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1098 {
1099   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1100   PetscErrorCode ierr;
1101 
1102   PetscFunctionBegin;
1103   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1104   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1110 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1111 {
1112   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1113   PetscErrorCode ierr;
1114 
1115   PetscFunctionBegin;
1116   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1117   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1123 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1124 {
1125   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1126   PetscErrorCode ierr;
1127 
1128   PetscFunctionBegin;
1129   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1130   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1131   PetscFunctionReturn(0);
1132 }
1133 
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1136 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1137 {
1138   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1139   Mat            A  = a->A,B = a->B;
1140   PetscErrorCode ierr;
1141   PetscReal      isend[5],irecv[5];
1142 
1143   PetscFunctionBegin;
1144   info->block_size = (PetscReal)matin->rmap->bs;
1145 
1146   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1147 
1148   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1149   isend[3] = info->memory;  isend[4] = info->mallocs;
1150 
1151   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1152 
1153   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1154   isend[3] += info->memory;  isend[4] += info->mallocs;
1155   if (flag == MAT_LOCAL) {
1156     info->nz_used      = isend[0];
1157     info->nz_allocated = isend[1];
1158     info->nz_unneeded  = isend[2];
1159     info->memory       = isend[3];
1160     info->mallocs      = isend[4];
1161   } else if (flag == MAT_GLOBAL_MAX) {
1162     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1163 
1164     info->nz_used      = irecv[0];
1165     info->nz_allocated = irecv[1];
1166     info->nz_unneeded  = irecv[2];
1167     info->memory       = irecv[3];
1168     info->mallocs      = irecv[4];
1169   } else if (flag == MAT_GLOBAL_SUM) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1171 
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1178   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1179   info->fill_ratio_needed = 0;
1180   info->factor_mallocs    = 0;
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1186 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1187 {
1188   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1189   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1190   PetscErrorCode ierr;
1191 
1192   PetscFunctionBegin;
1193   switch (op) {
1194   case MAT_NEW_NONZERO_LOCATIONS:
1195   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1196   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1197   case MAT_KEEP_NONZERO_PATTERN:
1198   case MAT_NEW_NONZERO_LOCATION_ERR:
1199     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1200     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1201     break;
1202   case MAT_ROW_ORIENTED:
1203     a->roworiented = flg;
1204 
1205     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1206     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1207     break;
1208   case MAT_NEW_DIAGONALS:
1209     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1210     break;
1211   case MAT_IGNORE_OFF_PROC_ENTRIES:
1212     a->donotstash = flg;
1213     break;
1214   case MAT_USE_HASH_TABLE:
1215     a->ht_flag = flg;
1216     break;
1217   case MAT_HERMITIAN:
1218     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1219     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1220 
1221     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1222     break;
1223   case MAT_SPD:
1224     A->spd_set = PETSC_TRUE;
1225     A->spd     = flg;
1226     if (flg) {
1227       A->symmetric                  = PETSC_TRUE;
1228       A->structurally_symmetric     = PETSC_TRUE;
1229       A->symmetric_set              = PETSC_TRUE;
1230       A->structurally_symmetric_set = PETSC_TRUE;
1231     }
1232     break;
1233   case MAT_SYMMETRIC:
1234     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1235     break;
1236   case MAT_STRUCTURALLY_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_SYMMETRY_ETERNAL:
1240     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1241     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1242     break;
1243   case MAT_IGNORE_LOWER_TRIANGULAR:
1244     aA->ignore_ltriangular = flg;
1245     break;
1246   case MAT_ERROR_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_GETROW_UPPERTRIANGULAR:
1250     aA->getrow_utriangular = flg;
1251     break;
1252   default:
1253     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1261 {
1262   PetscErrorCode ierr;
1263 
1264   PetscFunctionBegin;
1265   if (MAT_INITIAL_MATRIX || *B != A) {
1266     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1267   }
1268   PetscFunctionReturn(0);
1269 }
1270 
1271 #undef __FUNCT__
1272 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1273 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1274 {
1275   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1276   Mat            a     = baij->A, b=baij->B;
1277   PetscErrorCode ierr;
1278   PetscInt       nv,m,n;
1279   PetscBool      flg;
1280 
1281   PetscFunctionBegin;
1282   if (ll != rr) {
1283     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1284     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1285   }
1286   if (!ll) PetscFunctionReturn(0);
1287 
1288   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1289   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1290 
1291   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1292   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1293 
1294   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1295 
1296   /* left diagonalscale the off-diagonal part */
1297   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1298 
1299   /* scale the diagonal part */
1300   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1301 
1302   /* right diagonalscale the off-diagonal part */
1303   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1304   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1305   PetscFunctionReturn(0);
1306 }
1307 
1308 #undef __FUNCT__
1309 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1310 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1311 {
1312   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1313   PetscErrorCode ierr;
1314 
1315   PetscFunctionBegin;
1316   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatEqual_MPISBAIJ"
1324 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1325 {
1326   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1327   Mat            a,b,c,d;
1328   PetscBool      flg;
1329   PetscErrorCode ierr;
1330 
1331   PetscFunctionBegin;
1332   a = matA->A; b = matA->B;
1333   c = matB->A; d = matB->B;
1334 
1335   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1336   if (flg) {
1337     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1338   }
1339   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatCopy_MPISBAIJ"
1345 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1346 {
1347   PetscErrorCode ierr;
1348   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1349   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1350 
1351   PetscFunctionBegin;
1352   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1353   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1354     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1355     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1356     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1357   } else {
1358     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1359     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1366 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1367 {
1368   PetscErrorCode ierr;
1369 
1370   PetscFunctionBegin;
1371   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1372   PetscFunctionReturn(0);
1373 }
1374 
1375 #undef __FUNCT__
1376 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1377 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1378 {
1379   PetscErrorCode ierr;
1380   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1381   PetscBLASInt   bnz,one=1;
1382   Mat_SeqSBAIJ   *xa,*ya;
1383   Mat_SeqBAIJ    *xb,*yb;
1384 
1385   PetscFunctionBegin;
1386   if (str == SAME_NONZERO_PATTERN) {
1387     PetscScalar alpha = a;
1388     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1389     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1390     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1391     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1392     xb   = (Mat_SeqBAIJ*)xx->B->data;
1393     yb   = (Mat_SeqBAIJ*)yy->B->data;
1394     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1395     PetscStackCall("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1396   } else {
1397     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1398     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1399     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1400   }
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1406 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1407 {
1408   PetscErrorCode ierr;
1409   PetscInt       i;
1410   PetscBool      flg;
1411 
1412   PetscFunctionBegin;
1413   for (i=0; i<n; i++) {
1414     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1415     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1416   }
1417   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 
1422 /* -------------------------------------------------------------------*/
1423 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1424                                        MatGetRow_MPISBAIJ,
1425                                        MatRestoreRow_MPISBAIJ,
1426                                        MatMult_MPISBAIJ,
1427                                /*  4*/ MatMultAdd_MPISBAIJ,
1428                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1429                                        MatMultAdd_MPISBAIJ,
1430                                        0,
1431                                        0,
1432                                        0,
1433                                /* 10*/ 0,
1434                                        0,
1435                                        0,
1436                                        MatSOR_MPISBAIJ,
1437                                        MatTranspose_MPISBAIJ,
1438                                /* 15*/ MatGetInfo_MPISBAIJ,
1439                                        MatEqual_MPISBAIJ,
1440                                        MatGetDiagonal_MPISBAIJ,
1441                                        MatDiagonalScale_MPISBAIJ,
1442                                        MatNorm_MPISBAIJ,
1443                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1444                                        MatAssemblyEnd_MPISBAIJ,
1445                                        MatSetOption_MPISBAIJ,
1446                                        MatZeroEntries_MPISBAIJ,
1447                                /* 24*/ 0,
1448                                        0,
1449                                        0,
1450                                        0,
1451                                        0,
1452                                /* 29*/ MatSetUp_MPISBAIJ,
1453                                        0,
1454                                        0,
1455                                        0,
1456                                        0,
1457                                /* 34*/ MatDuplicate_MPISBAIJ,
1458                                        0,
1459                                        0,
1460                                        0,
1461                                        0,
1462                                /* 39*/ MatAXPY_MPISBAIJ,
1463                                        MatGetSubMatrices_MPISBAIJ,
1464                                        MatIncreaseOverlap_MPISBAIJ,
1465                                        MatGetValues_MPISBAIJ,
1466                                        MatCopy_MPISBAIJ,
1467                                /* 44*/ 0,
1468                                        MatScale_MPISBAIJ,
1469                                        0,
1470                                        0,
1471                                        0,
1472                                /* 49*/ 0,
1473                                        0,
1474                                        0,
1475                                        0,
1476                                        0,
1477                                /* 54*/ 0,
1478                                        0,
1479                                        MatSetUnfactored_MPISBAIJ,
1480                                        0,
1481                                        MatSetValuesBlocked_MPISBAIJ,
1482                                /* 59*/ 0,
1483                                        0,
1484                                        0,
1485                                        0,
1486                                        0,
1487                                /* 64*/ 0,
1488                                        0,
1489                                        0,
1490                                        0,
1491                                        0,
1492                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1493                                        0,
1494                                        0,
1495                                        0,
1496                                        0,
1497                                /* 74*/ 0,
1498                                        0,
1499                                        0,
1500                                        0,
1501                                        0,
1502                                /* 79*/ 0,
1503                                        0,
1504                                        0,
1505                                        0,
1506                                        MatLoad_MPISBAIJ,
1507                                /* 84*/ 0,
1508                                        0,
1509                                        0,
1510                                        0,
1511                                        0,
1512                                /* 89*/ 0,
1513                                        0,
1514                                        0,
1515                                        0,
1516                                        0,
1517                                /* 94*/ 0,
1518                                        0,
1519                                        0,
1520                                        0,
1521                                        0,
1522                                /* 99*/ 0,
1523                                        0,
1524                                        0,
1525                                        0,
1526                                        0,
1527                                /*104*/ 0,
1528                                        MatRealPart_MPISBAIJ,
1529                                        MatImaginaryPart_MPISBAIJ,
1530                                        MatGetRowUpperTriangular_MPISBAIJ,
1531                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1532                                /*109*/ 0,
1533                                        0,
1534                                        0,
1535                                        0,
1536                                        0,
1537                                /*114*/ 0,
1538                                        0,
1539                                        0,
1540                                        0,
1541                                        0,
1542                                /*119*/ 0,
1543                                        0,
1544                                        0,
1545                                        0,
1546                                        0,
1547                                /*124*/ 0,
1548                                        0,
1549                                        0,
1550                                        0,
1551                                        0,
1552                                /*129*/ 0,
1553                                        0,
1554                                        0,
1555                                        0,
1556                                        0,
1557                                /*134*/ 0,
1558                                        0,
1559                                        0,
1560                                        0,
1561                                        0,
1562                                /*139*/ 0,
1563                                        0
1564 };
1565 
1566 
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1569 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1570 {
1571   PetscFunctionBegin;
1572   *a = ((Mat_MPISBAIJ*)A->data)->A;
1573   PetscFunctionReturn(0);
1574 }
1575 
1576 #undef __FUNCT__
1577 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1578 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1579 {
1580   Mat_MPISBAIJ   *b;
1581   PetscErrorCode ierr;
1582   PetscInt       i,mbs,Mbs;
1583 
1584   PetscFunctionBegin;
1585   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1586   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1587   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1588   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1589 
1590   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1591   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1592   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1593   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1594   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1595 
1596   if (d_nnz) {
1597     for (i=0; i<B->rmap->n/bs; i++) {
1598       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1599     }
1600   }
1601   if (o_nnz) {
1602     for (i=0; i<B->rmap->n/bs; i++) {
1603       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1604     }
1605   }
1606 
1607   b   = (Mat_MPISBAIJ*)B->data;
1608   mbs = B->rmap->n/bs;
1609   Mbs = B->rmap->N/bs;
1610   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1611 
1612   B->rmap->bs = bs;
1613   b->bs2      = bs*bs;
1614   b->mbs      = mbs;
1615   b->nbs      = mbs;
1616   b->Mbs      = Mbs;
1617   b->Nbs      = Mbs;
1618 
1619   for (i=0; i<=b->size; i++) {
1620     b->rangebs[i] = B->rmap->range[i]/bs;
1621   }
1622   b->rstartbs = B->rmap->rstart/bs;
1623   b->rendbs   = B->rmap->rend/bs;
1624 
1625   b->cstartbs = B->cmap->rstart/bs;
1626   b->cendbs   = B->cmap->rend/bs;
1627 
1628   if (!B->preallocated) {
1629     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1630     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1631     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1632     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1633     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1634     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1635     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1636     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1637     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1638   }
1639 
1640   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1641   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1642 
1643   B->preallocated = PETSC_TRUE;
1644   PetscFunctionReturn(0);
1645 }
1646 
1647 #undef __FUNCT__
1648 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1649 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1650 {
1651   PetscInt       m,rstart,cstart,cend;
1652   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1653   const PetscInt *JJ    =0;
1654   PetscScalar    *values=0;
1655   PetscErrorCode ierr;
1656 
1657   PetscFunctionBegin;
1658   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1659   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1660   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1661   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1662   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1663   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1664   m      = B->rmap->n/bs;
1665   rstart = B->rmap->rstart/bs;
1666   cstart = B->cmap->rstart/bs;
1667   cend   = B->cmap->rend/bs;
1668 
1669   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1670   ierr = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1671   for (i=0; i<m; i++) {
1672     nz = ii[i+1] - ii[i];
1673     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1674     nz_max = PetscMax(nz_max,nz);
1675     JJ     = jj + ii[i];
1676     for (j=0; j<nz; j++) {
1677       if (*JJ >= cstart) break;
1678       JJ++;
1679     }
1680     d = 0;
1681     for (; j<nz; j++) {
1682       if (*JJ++ >= cend) break;
1683       d++;
1684     }
1685     d_nnz[i] = d;
1686     o_nnz[i] = nz - d;
1687   }
1688   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1689   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1690 
1691   values = (PetscScalar*)V;
1692   if (!values) {
1693     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1694     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1695   }
1696   for (i=0; i<m; i++) {
1697     PetscInt          row    = i + rstart;
1698     PetscInt          ncols  = ii[i+1] - ii[i];
1699     const PetscInt    *icols = jj + ii[i];
1700     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1701     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1702   }
1703 
1704   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1705   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1706   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1707   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1708   PetscFunctionReturn(0);
1709 }
1710 
1711 #if defined(PETSC_HAVE_MUMPS)
1712 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1713 #endif
1714 #if defined(PETSC_HAVE_PASTIX)
1715 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1716 #endif
1717 
1718 /*MC
1719    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1720    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1721    the matrix is stored.
1722 
1723   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1724   can call MatSetOption(Mat, MAT_HERMITIAN);
1725 
1726    Options Database Keys:
1727 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1728 
1729   Level: beginner
1730 
1731 .seealso: MatCreateMPISBAIJ
1732 M*/
1733 
1734 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1735 
1736 #undef __FUNCT__
1737 #define __FUNCT__ "MatCreate_MPISBAIJ"
1738 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1739 {
1740   Mat_MPISBAIJ   *b;
1741   PetscErrorCode ierr;
1742   PetscBool      flg;
1743 
1744   PetscFunctionBegin;
1745   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1746   B->data = (void*)b;
1747   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1748 
1749   B->ops->destroy = MatDestroy_MPISBAIJ;
1750   B->ops->view    = MatView_MPISBAIJ;
1751   B->assembled    = PETSC_FALSE;
1752   B->insertmode   = NOT_SET_VALUES;
1753 
1754   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1755   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1756 
1757   /* build local table of row and column ownerships */
1758   ierr = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1759 
1760   /* build cache for off array entries formed */
1761   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1762 
1763   b->donotstash  = PETSC_FALSE;
1764   b->colmap      = NULL;
1765   b->garray      = NULL;
1766   b->roworiented = PETSC_TRUE;
1767 
1768   /* stuff used in block assembly */
1769   b->barray = 0;
1770 
1771   /* stuff used for matrix vector multiply */
1772   b->lvec    = 0;
1773   b->Mvctx   = 0;
1774   b->slvec0  = 0;
1775   b->slvec0b = 0;
1776   b->slvec1  = 0;
1777   b->slvec1a = 0;
1778   b->slvec1b = 0;
1779   b->sMvctx  = 0;
1780 
1781   /* stuff for MatGetRow() */
1782   b->rowindices   = 0;
1783   b->rowvalues    = 0;
1784   b->getrowactive = PETSC_FALSE;
1785 
1786   /* hash table stuff */
1787   b->ht           = 0;
1788   b->hd           = 0;
1789   b->ht_size      = 0;
1790   b->ht_flag      = PETSC_FALSE;
1791   b->ht_fact      = 0;
1792   b->ht_total_ct  = 0;
1793   b->ht_insert_ct = 0;
1794 
1795   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1796   b->ijonly = PETSC_FALSE;
1797 
1798   b->in_loc = 0;
1799   b->v_loc  = 0;
1800   b->n_loc  = 0;
1801   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1802   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1803   if (flg) {
1804     PetscReal fact = 1.39;
1805     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1806     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1807     if (fact <= 1.0) fact = 1.39;
1808     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1809     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1810   }
1811   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1812 
1813 #if defined(PETSC_HAVE_PASTIX)
1814   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C","MatGetFactor_mpisbaij_pastix",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1815 #endif
1816 #if defined(PETSC_HAVE_MUMPS)
1817   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C","MatGetFactor_sbaij_mumps",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1818 #endif
1819   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C","MatStoreValues_MPISBAIJ",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1820   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C","MatRetrieveValues_MPISBAIJ",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1821   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C","MatGetDiagonalBlock_MPISBAIJ",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1822   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C","MatMPISBAIJSetPreallocation_MPISBAIJ",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1823   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C","MatMPISBAIJSetPreallocationCSR_MPISBAIJ",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1824   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C","MatConvert_MPISBAIJ_MPISBSTRM",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1825 
1826   B->symmetric                  = PETSC_TRUE;
1827   B->structurally_symmetric     = PETSC_TRUE;
1828   B->symmetric_set              = PETSC_TRUE;
1829   B->structurally_symmetric_set = PETSC_TRUE;
1830 
1831   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 /*MC
1836    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1837 
1838    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1839    and MATMPISBAIJ otherwise.
1840 
1841    Options Database Keys:
1842 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1843 
1844   Level: beginner
1845 
1846 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1847 M*/
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1851 /*@C
1852    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1853    the user should preallocate the matrix storage by setting the parameters
1854    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1855    performance can be increased by more than a factor of 50.
1856 
1857    Collective on Mat
1858 
1859    Input Parameters:
1860 +  A - the matrix
1861 .  bs   - size of blockk
1862 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1863            submatrix  (same for all local rows)
1864 .  d_nnz - array containing the number of block nonzeros in the various block rows
1865            in the upper triangular and diagonal part of the in diagonal portion of the local
1866            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1867            for the diagonal entry and set a value even if it is zero.
1868 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1869            submatrix (same for all local rows).
1870 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1871            off-diagonal portion of the local submatrix that is right of the diagonal
1872            (possibly different for each block row) or NULL.
1873 
1874 
1875    Options Database Keys:
1876 .   -mat_no_unroll - uses code that does not unroll the loops in the
1877                      block calculations (much slower)
1878 .   -mat_block_size - size of the blocks to use
1879 
1880    Notes:
1881 
1882    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1883    than it must be used on all processors that share the object for that argument.
1884 
1885    If the *_nnz parameter is given then the *_nz parameter is ignored
1886 
1887    Storage Information:
1888    For a square global matrix we define each processor's diagonal portion
1889    to be its local rows and the corresponding columns (a square submatrix);
1890    each processor's off-diagonal portion encompasses the remainder of the
1891    local matrix (a rectangular submatrix).
1892 
1893    The user can specify preallocated storage for the diagonal part of
1894    the local submatrix with either d_nz or d_nnz (not both).  Set
1895    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1896    memory allocation.  Likewise, specify preallocated storage for the
1897    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1898 
1899    You can call MatGetInfo() to get information on how effective the preallocation was;
1900    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1901    You can also run with the option -info and look for messages with the string
1902    malloc in them to see if additional memory allocation was needed.
1903 
1904    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1905    the figure below we depict these three local rows and all columns (0-11).
1906 
1907 .vb
1908            0 1 2 3 4 5 6 7 8 9 10 11
1909           -------------------
1910    row 3  |  . . . d d d o o o o o o
1911    row 4  |  . . . d d d o o o o o o
1912    row 5  |  . . . d d d o o o o o o
1913           -------------------
1914 .ve
1915 
1916    Thus, any entries in the d locations are stored in the d (diagonal)
1917    submatrix, and any entries in the o locations are stored in the
1918    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1919    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1920 
1921    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1922    plus the diagonal part of the d matrix,
1923    and o_nz should indicate the number of block nonzeros per row in the o matrix
1924 
1925    In general, for PDE problems in which most nonzeros are near the diagonal,
1926    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1927    or you will get TERRIBLE performance; see the users' manual chapter on
1928    matrices.
1929 
1930    Level: intermediate
1931 
1932 .keywords: matrix, block, aij, compressed row, sparse, parallel
1933 
1934 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1935 @*/
1936 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1937 {
1938   PetscErrorCode ierr;
1939 
1940   PetscFunctionBegin;
1941   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1942   PetscValidType(B,1);
1943   PetscValidLogicalCollectiveInt(B,bs,2);
1944   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1945   PetscFunctionReturn(0);
1946 }
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatCreateSBAIJ"
1950 /*@C
1951    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1952    (block compressed row).  For good matrix assembly performance
1953    the user should preallocate the matrix storage by setting the parameters
1954    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1955    performance can be increased by more than a factor of 50.
1956 
1957    Collective on MPI_Comm
1958 
1959    Input Parameters:
1960 +  comm - MPI communicator
1961 .  bs   - size of blockk
1962 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1963            This value should be the same as the local size used in creating the
1964            y vector for the matrix-vector product y = Ax.
1965 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1966            This value should be the same as the local size used in creating the
1967            x vector for the matrix-vector product y = Ax.
1968 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1969 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1970 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1971            submatrix  (same for all local rows)
1972 .  d_nnz - array containing the number of block nonzeros in the various block rows
1973            in the upper triangular portion of the in diagonal portion of the local
1974            (possibly different for each block block row) or NULL.
1975            If you plan to factor the matrix you must leave room for the diagonal entry and
1976            set its value even if it is zero.
1977 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1978            submatrix (same for all local rows).
1979 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1980            off-diagonal portion of the local submatrix (possibly different for
1981            each block row) or NULL.
1982 
1983    Output Parameter:
1984 .  A - the matrix
1985 
1986    Options Database Keys:
1987 .   -mat_no_unroll - uses code that does not unroll the loops in the
1988                      block calculations (much slower)
1989 .   -mat_block_size - size of the blocks to use
1990 .   -mat_mpi - use the parallel matrix data structures even on one processor
1991                (defaults to using SeqBAIJ format on one processor)
1992 
1993    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1994    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1995    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1996 
1997    Notes:
1998    The number of rows and columns must be divisible by blocksize.
1999    This matrix type does not support complex Hermitian operation.
2000 
2001    The user MUST specify either the local or global matrix dimensions
2002    (possibly both).
2003 
2004    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2005    than it must be used on all processors that share the object for that argument.
2006 
2007    If the *_nnz parameter is given then the *_nz parameter is ignored
2008 
2009    Storage Information:
2010    For a square global matrix we define each processor's diagonal portion
2011    to be its local rows and the corresponding columns (a square submatrix);
2012    each processor's off-diagonal portion encompasses the remainder of the
2013    local matrix (a rectangular submatrix).
2014 
2015    The user can specify preallocated storage for the diagonal part of
2016    the local submatrix with either d_nz or d_nnz (not both).  Set
2017    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2018    memory allocation.  Likewise, specify preallocated storage for the
2019    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2020 
2021    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2022    the figure below we depict these three local rows and all columns (0-11).
2023 
2024 .vb
2025            0 1 2 3 4 5 6 7 8 9 10 11
2026           -------------------
2027    row 3  |  . . . d d d o o o o o o
2028    row 4  |  . . . d d d o o o o o o
2029    row 5  |  . . . d d d o o o o o o
2030           -------------------
2031 .ve
2032 
2033    Thus, any entries in the d locations are stored in the d (diagonal)
2034    submatrix, and any entries in the o locations are stored in the
2035    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2036    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2037 
2038    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2039    plus the diagonal part of the d matrix,
2040    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2041    In general, for PDE problems in which most nonzeros are near the diagonal,
2042    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2043    or you will get TERRIBLE performance; see the users' manual chapter on
2044    matrices.
2045 
2046    Level: intermediate
2047 
2048 .keywords: matrix, block, aij, compressed row, sparse, parallel
2049 
2050 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2051 @*/
2052 
2053 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2054 {
2055   PetscErrorCode ierr;
2056   PetscMPIInt    size;
2057 
2058   PetscFunctionBegin;
2059   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2060   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2061   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2062   if (size > 1) {
2063     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2064     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2065   } else {
2066     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2067     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2068   }
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2075 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2076 {
2077   Mat            mat;
2078   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2079   PetscErrorCode ierr;
2080   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2081   PetscScalar    *array;
2082 
2083   PetscFunctionBegin;
2084   *newmat = 0;
2085 
2086   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2087   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2088   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2089   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2090   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2091   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2092 
2093   mat->factortype   = matin->factortype;
2094   mat->preallocated = PETSC_TRUE;
2095   mat->assembled    = PETSC_TRUE;
2096   mat->insertmode   = NOT_SET_VALUES;
2097 
2098   a      = (Mat_MPISBAIJ*)mat->data;
2099   a->bs2 = oldmat->bs2;
2100   a->mbs = oldmat->mbs;
2101   a->nbs = oldmat->nbs;
2102   a->Mbs = oldmat->Mbs;
2103   a->Nbs = oldmat->Nbs;
2104 
2105 
2106   a->size         = oldmat->size;
2107   a->rank         = oldmat->rank;
2108   a->donotstash   = oldmat->donotstash;
2109   a->roworiented  = oldmat->roworiented;
2110   a->rowindices   = 0;
2111   a->rowvalues    = 0;
2112   a->getrowactive = PETSC_FALSE;
2113   a->barray       = 0;
2114   a->rstartbs     = oldmat->rstartbs;
2115   a->rendbs       = oldmat->rendbs;
2116   a->cstartbs     = oldmat->cstartbs;
2117   a->cendbs       = oldmat->cendbs;
2118 
2119   /* hash table stuff */
2120   a->ht           = 0;
2121   a->hd           = 0;
2122   a->ht_size      = 0;
2123   a->ht_flag      = oldmat->ht_flag;
2124   a->ht_fact      = oldmat->ht_fact;
2125   a->ht_total_ct  = 0;
2126   a->ht_insert_ct = 0;
2127 
2128   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2129   if (oldmat->colmap) {
2130 #if defined(PETSC_USE_CTABLE)
2131     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2132 #else
2133     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2134     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2135     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2136 #endif
2137   } else a->colmap = 0;
2138 
2139   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2140     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2141     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2142     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2143   } else a->garray = 0;
2144 
2145   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2146   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2147   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2148   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2149   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2150 
2151   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2153   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2154   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2155 
2156   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2157   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2158   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2159   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2160   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2161   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2162   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2163   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2164   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2165   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2166   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2167   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2168   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2169 
2170   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2171   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2172   a->sMvctx = oldmat->sMvctx;
2173   ierr      = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2174 
2175   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2176   ierr    = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2177   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2178   ierr    = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2179   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2180   *newmat = mat;
2181   PetscFunctionReturn(0);
2182 }
2183 
2184 #undef __FUNCT__
2185 #define __FUNCT__ "MatLoad_MPISBAIJ"
2186 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2187 {
2188   PetscErrorCode ierr;
2189   PetscInt       i,nz,j,rstart,rend;
2190   PetscScalar    *vals,*buf;
2191   MPI_Comm       comm;
2192   MPI_Status     status;
2193   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2194   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2195   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2196   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2197   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2198   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2199   int            fd;
2200 
2201   PetscFunctionBegin;
2202   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2203   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2204   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2205   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2206 
2207   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2208   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2209   if (!rank) {
2210     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2211     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2212     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2213     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2214   }
2215 
2216   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2217 
2218   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2219   M    = header[1];
2220   N    = header[2];
2221 
2222   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2223   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2224   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2225 
2226   /* If global sizes are set, check if they are consistent with that given in the file */
2227   if (sizesset) {
2228     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2229   }
2230   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2231   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2232 
2233   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2234 
2235   /*
2236      This code adds extra rows to make sure the number of rows is
2237      divisible by the blocksize
2238   */
2239   Mbs        = M/bs;
2240   extra_rows = bs - M + bs*(Mbs);
2241   if (extra_rows == bs) extra_rows = 0;
2242   else                  Mbs++;
2243   if (extra_rows &&!rank) {
2244     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2245   }
2246 
2247   /* determine ownership of all rows */
2248   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2249     mbs = Mbs/size + ((Mbs % size) > rank);
2250     m   = mbs*bs;
2251   } else { /* User Set */
2252     m   = newmat->rmap->n;
2253     mbs = m/bs;
2254   }
2255   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2256   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2257   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2258   rowners[0] = 0;
2259   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2260   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2261   rstart = rowners[rank];
2262   rend   = rowners[rank+1];
2263 
2264   /* distribute row lengths to all processors */
2265   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2266   if (!rank) {
2267     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2268     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2269     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2270     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2271     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2272     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2273     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2274   } else {
2275     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2276   }
2277 
2278   if (!rank) {   /* procs[0] */
2279     /* calculate the number of nonzeros on each processor */
2280     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2281     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2282     for (i=0; i<size; i++) {
2283       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2284         procsnz[i] += rowlengths[j];
2285       }
2286     }
2287     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2288 
2289     /* determine max buffer needed and allocate it */
2290     maxnz = 0;
2291     for (i=0; i<size; i++) {
2292       maxnz = PetscMax(maxnz,procsnz[i]);
2293     }
2294     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2295 
2296     /* read in my part of the matrix column indices  */
2297     nz     = procsnz[0];
2298     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2299     mycols = ibuf;
2300     if (size == 1) nz -= extra_rows;
2301     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2302     if (size == 1) {
2303       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2304     }
2305 
2306     /* read in every ones (except the last) and ship off */
2307     for (i=1; i<size-1; i++) {
2308       nz   = procsnz[i];
2309       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2310       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2311     }
2312     /* read in the stuff for the last proc */
2313     if (size != 1) {
2314       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2315       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2316       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2317       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2318     }
2319     ierr = PetscFree(cols);CHKERRQ(ierr);
2320   } else {  /* procs[i], i>0 */
2321     /* determine buffer space needed for message */
2322     nz = 0;
2323     for (i=0; i<m; i++) nz += locrowlens[i];
2324     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2325     mycols = ibuf;
2326     /* receive message of column indices*/
2327     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2328     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2329     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2330   }
2331 
2332   /* loop over local rows, determining number of off diagonal entries */
2333   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2334   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2335   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2336   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2337   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2338   rowcount = 0;
2339   nzcount  = 0;
2340   for (i=0; i<mbs; i++) {
2341     dcount  = 0;
2342     odcount = 0;
2343     for (j=0; j<bs; j++) {
2344       kmax = locrowlens[rowcount];
2345       for (k=0; k<kmax; k++) {
2346         tmp = mycols[nzcount++]/bs; /* block col. index */
2347         if (!mask[tmp]) {
2348           mask[tmp] = 1;
2349           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2350           else masked1[dcount++] = tmp; /* entry in diag portion */
2351         }
2352       }
2353       rowcount++;
2354     }
2355 
2356     dlens[i]  = dcount;  /* d_nzz[i] */
2357     odlens[i] = odcount; /* o_nzz[i] */
2358 
2359     /* zero out the mask elements we set */
2360     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2361     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2362   }
2363   if (!sizesset) {
2364     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2365   }
2366   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2367   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2368 
2369   if (!rank) {
2370     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2371     /* read in my part of the matrix numerical values  */
2372     nz     = procsnz[0];
2373     vals   = buf;
2374     mycols = ibuf;
2375     if (size == 1) nz -= extra_rows;
2376     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2377     if (size == 1) {
2378       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2379     }
2380 
2381     /* insert into matrix */
2382     jj = rstart*bs;
2383     for (i=0; i<m; i++) {
2384       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2385       mycols += locrowlens[i];
2386       vals   += locrowlens[i];
2387       jj++;
2388     }
2389 
2390     /* read in other processors (except the last one) and ship out */
2391     for (i=1; i<size-1; i++) {
2392       nz   = procsnz[i];
2393       vals = buf;
2394       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2395       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2396     }
2397     /* the last proc */
2398     if (size != 1) {
2399       nz   = procsnz[i] - extra_rows;
2400       vals = buf;
2401       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2402       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2403       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2404     }
2405     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2406 
2407   } else {
2408     /* receive numeric values */
2409     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2410 
2411     /* receive message of values*/
2412     vals   = buf;
2413     mycols = ibuf;
2414     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2415     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2416     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2417 
2418     /* insert into matrix */
2419     jj = rstart*bs;
2420     for (i=0; i<m; i++) {
2421       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2422       mycols += locrowlens[i];
2423       vals   += locrowlens[i];
2424       jj++;
2425     }
2426   }
2427 
2428   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2429   ierr = PetscFree(buf);CHKERRQ(ierr);
2430   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2431   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2432   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2433   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2434   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2435   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2436   PetscFunctionReturn(0);
2437 }
2438 
2439 #undef __FUNCT__
2440 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2441 /*XXXXX@
2442    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2443 
2444    Input Parameters:
2445 .  mat  - the matrix
2446 .  fact - factor
2447 
2448    Not Collective on Mat, each process can have a different hash factor
2449 
2450    Level: advanced
2451 
2452   Notes:
2453    This can also be set by the command line option: -mat_use_hash_table fact
2454 
2455 .keywords: matrix, hashtable, factor, HT
2456 
2457 .seealso: MatSetOption()
2458 @XXXXX*/
2459 
2460 
2461 #undef __FUNCT__
2462 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2463 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2464 {
2465   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2466   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2467   PetscReal      atmp;
2468   PetscReal      *work,*svalues,*rvalues;
2469   PetscErrorCode ierr;
2470   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2471   PetscMPIInt    rank,size;
2472   PetscInt       *rowners_bs,dest,count,source;
2473   PetscScalar    *va;
2474   MatScalar      *ba;
2475   MPI_Status     stat;
2476 
2477   PetscFunctionBegin;
2478   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2479   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2480   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2481 
2482   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2483   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2484 
2485   bs  = A->rmap->bs;
2486   mbs = a->mbs;
2487   Mbs = a->Mbs;
2488   ba  = b->a;
2489   bi  = b->i;
2490   bj  = b->j;
2491 
2492   /* find ownerships */
2493   rowners_bs = A->rmap->range;
2494 
2495   /* each proc creates an array to be distributed */
2496   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2497   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2498 
2499   /* row_max for B */
2500   if (rank != size-1) {
2501     for (i=0; i<mbs; i++) {
2502       ncols = bi[1] - bi[0]; bi++;
2503       brow  = bs*i;
2504       for (j=0; j<ncols; j++) {
2505         bcol = bs*(*bj);
2506         for (kcol=0; kcol<bs; kcol++) {
2507           col  = bcol + kcol;                /* local col index */
2508           col += rowners_bs[rank+1];      /* global col index */
2509           for (krow=0; krow<bs; krow++) {
2510             atmp = PetscAbsScalar(*ba); ba++;
2511             row  = brow + krow;   /* local row index */
2512             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2513             if (work[col] < atmp) work[col] = atmp;
2514           }
2515         }
2516         bj++;
2517       }
2518     }
2519 
2520     /* send values to its owners */
2521     for (dest=rank+1; dest<size; dest++) {
2522       svalues = work + rowners_bs[dest];
2523       count   = rowners_bs[dest+1]-rowners_bs[dest];
2524       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2525     }
2526   }
2527 
2528   /* receive values */
2529   if (rank) {
2530     rvalues = work;
2531     count   = rowners_bs[rank+1]-rowners_bs[rank];
2532     for (source=0; source<rank; source++) {
2533       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2534       /* process values */
2535       for (i=0; i<count; i++) {
2536         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2537       }
2538     }
2539   }
2540 
2541   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2542   ierr = PetscFree(work);CHKERRQ(ierr);
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatSOR_MPISBAIJ"
2548 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2549 {
2550   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2551   PetscErrorCode    ierr;
2552   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2553   PetscScalar       *x,*ptr,*from;
2554   Vec               bb1;
2555   const PetscScalar *b;
2556 
2557   PetscFunctionBegin;
2558   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2559   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2560 
2561   if (flag == SOR_APPLY_UPPER) {
2562     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2563     PetscFunctionReturn(0);
2564   }
2565 
2566   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2567     if (flag & SOR_ZERO_INITIAL_GUESS) {
2568       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2569       its--;
2570     }
2571 
2572     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2573     while (its--) {
2574 
2575       /* lower triangular part: slvec0b = - B^T*xx */
2576       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2577 
2578       /* copy xx into slvec0a */
2579       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2580       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2581       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2582       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2583 
2584       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2585 
2586       /* copy bb into slvec1a */
2587       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2588       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2589       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2590       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2591 
2592       /* set slvec1b = 0 */
2593       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2594 
2595       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2596       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2597       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2598       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2599 
2600       /* upper triangular part: bb1 = bb1 - B*x */
2601       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2602 
2603       /* local diagonal sweep */
2604       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2605     }
2606     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2607   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2608     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2609   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2610     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2611   } else if (flag & SOR_EISENSTAT) {
2612     Vec               xx1;
2613     PetscBool         hasop;
2614     const PetscScalar *diag;
2615     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2616     PetscInt          i,n;
2617 
2618     if (!mat->xx1) {
2619       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2620       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2621     }
2622     xx1 = mat->xx1;
2623     bb1 = mat->bb1;
2624 
2625     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2626 
2627     if (!mat->diag) {
2628       /* this is wrong for same matrix with new nonzero values */
2629       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2630       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2631     }
2632     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2633 
2634     if (hasop) {
2635       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2636       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2637     } else {
2638       /*
2639           These two lines are replaced by code that may be a bit faster for a good compiler
2640       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2641       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2642       */
2643       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2644       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2645       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2646       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2647       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2648       if (omega == 1.0) {
2649         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2650         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2651       } else {
2652         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2653         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2654       }
2655       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2656       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2657       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2658       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2659     }
2660 
2661     /* multiply off-diagonal portion of matrix */
2662     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2663     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2664     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2665     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2666     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2667     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2668     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2669     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2670     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2671     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2672 
2673     /* local sweep */
2674     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2675     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2676   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2677   PetscFunctionReturn(0);
2678 }
2679 
2680 #undef __FUNCT__
2681 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2682 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2683 {
2684   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2685   PetscErrorCode ierr;
2686   Vec            lvec1,bb1;
2687 
2688   PetscFunctionBegin;
2689   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2690   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2691 
2692   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2693     if (flag & SOR_ZERO_INITIAL_GUESS) {
2694       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2695       its--;
2696     }
2697 
2698     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2699     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2700     while (its--) {
2701       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2702 
2703       /* lower diagonal part: bb1 = bb - B^T*xx */
2704       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2705       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2706 
2707       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2708       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2709       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2710 
2711       /* upper diagonal part: bb1 = bb1 - B*x */
2712       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2713       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2714 
2715       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2716 
2717       /* diagonal sweep */
2718       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2719     }
2720     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2721     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2722   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2728 /*@
2729      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2730          CSR format the local rows.
2731 
2732    Collective on MPI_Comm
2733 
2734    Input Parameters:
2735 +  comm - MPI communicator
2736 .  bs - the block size, only a block size of 1 is supported
2737 .  m - number of local rows (Cannot be PETSC_DECIDE)
2738 .  n - This value should be the same as the local size used in creating the
2739        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2740        calculated if N is given) For square matrices n is almost always m.
2741 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2742 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2743 .   i - row indices
2744 .   j - column indices
2745 -   a - matrix values
2746 
2747    Output Parameter:
2748 .   mat - the matrix
2749 
2750    Level: intermediate
2751 
2752    Notes:
2753        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2754      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2755      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2756 
2757        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2758 
2759 .keywords: matrix, aij, compressed row, sparse, parallel
2760 
2761 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2762           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2763 @*/
2764 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2765 {
2766   PetscErrorCode ierr;
2767 
2768 
2769   PetscFunctionBegin;
2770   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2771   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2772   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2773   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2774   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2775   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2776   PetscFunctionReturn(0);
2777 }
2778 
2779 
2780 #undef __FUNCT__
2781 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2782 /*@C
2783    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2784    (the default parallel PETSc format).
2785 
2786    Collective on MPI_Comm
2787 
2788    Input Parameters:
2789 +  A - the matrix
2790 .  bs - the block size
2791 .  i - the indices into j for the start of each local row (starts with zero)
2792 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2793 -  v - optional values in the matrix
2794 
2795    Level: developer
2796 
2797 .keywords: matrix, aij, compressed row, sparse, parallel
2798 
2799 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2800 @*/
2801 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2802 {
2803   PetscErrorCode ierr;
2804 
2805   PetscFunctionBegin;
2806   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2807   PetscFunctionReturn(0);
2808 }
2809 
2810 
2811