xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2cb8110e2e9a65fc9c7dc7d0fbc0b4f305ea1915)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 #undef __FUNCT__
24 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
25 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
26 {
27   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
28   PetscErrorCode ierr;
29 
30   PetscFunctionBegin;
31   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
32   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
33   PetscFunctionReturn(0);
34 }
35 
36 #undef __FUNCT__
37 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
38 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
39 {
40   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ*)mat->data;
41   PetscErrorCode ierr;
42 
43   PetscFunctionBegin;
44   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
45   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
46   PetscFunctionReturn(0);
47 }
48 
49 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
50   { \
51  \
52     brow = row/bs;  \
53     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
54     rmax = aimax[brow]; nrow = ailen[brow]; \
55     bcol = col/bs; \
56     ridx = row % bs; cidx = col % bs; \
57     low  = 0; high = nrow; \
58     while (high-low > 3) { \
59       t = (low+high)/2; \
60       if (rp[t] > bcol) high = t; \
61       else              low  = t; \
62     } \
63     for (_i=low; _i<high; _i++) { \
64       if (rp[_i] > bcol) break; \
65       if (rp[_i] == bcol) { \
66         bap = ap + bs2*_i + bs*cidx + ridx; \
67         if (addv == ADD_VALUES) *bap += value;  \
68         else                    *bap  = value;  \
69         goto a_noinsert; \
70       } \
71     } \
72     if (a->nonew == 1) goto a_noinsert; \
73     if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
74     MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
75     N = nrow++ - 1;  \
76     /* shift up all the later entries in this row */ \
77     for (ii=N; ii>=_i; ii--) { \
78       rp[ii+1] = rp[ii]; \
79       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
80     } \
81     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
82     rp[_i]                      = bcol;  \
83     ap[bs2*_i + bs*cidx + ridx] = value;  \
84     A->nonzerostate++;\
85 a_noinsert:; \
86     ailen[brow] = nrow; \
87   }
88 
89 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
90   { \
91     brow = row/bs;  \
92     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
93     rmax = bimax[brow]; nrow = bilen[brow]; \
94     bcol = col/bs; \
95     ridx = row % bs; cidx = col % bs; \
96     low  = 0; high = nrow; \
97     while (high-low > 3) { \
98       t = (low+high)/2; \
99       if (rp[t] > bcol) high = t; \
100       else              low  = t; \
101     } \
102     for (_i=low; _i<high; _i++) { \
103       if (rp[_i] > bcol) break; \
104       if (rp[_i] == bcol) { \
105         bap = ap + bs2*_i + bs*cidx + ridx; \
106         if (addv == ADD_VALUES) *bap += value;  \
107         else                    *bap  = value;  \
108         goto b_noinsert; \
109       } \
110     } \
111     if (b->nonew == 1) goto b_noinsert; \
112     if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
113     MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
114     N = nrow++ - 1;  \
115     /* shift up all the later entries in this row */ \
116     for (ii=N; ii>=_i; ii--) { \
117       rp[ii+1] = rp[ii]; \
118       ierr     = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
119     } \
120     if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
121     rp[_i]                      = bcol;  \
122     ap[bs2*_i + bs*cidx + ridx] = value;  \
123     B->nonzerostate++;\
124 b_noinsert:; \
125     bilen[brow] = nrow; \
126   }
127 
128 /* Only add/insert a(i,j) with i<=j (blocks).
129    Any a(i,j) with i>j input by user is ingored.
130 */
131 #undef __FUNCT__
132 #define __FUNCT__ "MatSetValues_MPISBAIJ"
133 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
134 {
135   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
136   MatScalar      value;
137   PetscBool      roworiented = baij->roworiented;
138   PetscErrorCode ierr;
139   PetscInt       i,j,row,col;
140   PetscInt       rstart_orig=mat->rmap->rstart;
141   PetscInt       rend_orig  =mat->rmap->rend,cstart_orig=mat->cmap->rstart;
142   PetscInt       cend_orig  =mat->cmap->rend,bs=mat->rmap->bs;
143 
144   /* Some Variables required in the macro */
145   Mat          A     = baij->A;
146   Mat_SeqSBAIJ *a    = (Mat_SeqSBAIJ*)(A)->data;
147   PetscInt     *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
148   MatScalar    *aa   =a->a;
149 
150   Mat         B     = baij->B;
151   Mat_SeqBAIJ *b    = (Mat_SeqBAIJ*)(B)->data;
152   PetscInt    *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
153   MatScalar   *ba   =b->a;
154 
155   PetscInt  *rp,ii,nrow,_i,rmax,N,brow,bcol;
156   PetscInt  low,high,t,ridx,cidx,bs2=a->bs2;
157   MatScalar *ap,*bap;
158 
159   /* for stash */
160   PetscInt  n_loc, *in_loc = NULL;
161   MatScalar *v_loc = NULL;
162 
163   PetscFunctionBegin;
164   if (v) PetscValidScalarPointer(v,6);
165   if (!baij->donotstash) {
166     if (n > baij->n_loc) {
167       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
168       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
169       ierr = PetscMalloc1(n,&baij->in_loc);CHKERRQ(ierr);
170       ierr = PetscMalloc1(n,&baij->v_loc);CHKERRQ(ierr);
171 
172       baij->n_loc = n;
173     }
174     in_loc = baij->in_loc;
175     v_loc  = baij->v_loc;
176   }
177 
178   for (i=0; i<m; i++) {
179     if (im[i] < 0) continue;
180 #if defined(PETSC_USE_DEBUG)
181     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
182 #endif
183     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
184       row = im[i] - rstart_orig;              /* local row index */
185       for (j=0; j<n; j++) {
186         if (im[i]/bs > in[j]/bs) {
187           if (a->ignore_ltriangular) {
188             continue;    /* ignore lower triangular blocks */
189           } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
190         }
191         if (in[j] >= cstart_orig && in[j] < cend_orig) {  /* diag entry (A) */
192           col  = in[j] - cstart_orig;         /* local col index */
193           brow = row/bs; bcol = col/bs;
194           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
195           if (roworiented) value = v[i*n+j];
196           else             value = v[i+j*m];
197           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
198           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
199         } else if (in[j] < 0) continue;
200 #if defined(PETSC_USE_DEBUG)
201         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
202 #endif
203         else {  /* off-diag entry (B) */
204           if (mat->was_assembled) {
205             if (!baij->colmap) {
206               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
207             }
208 #if defined(PETSC_USE_CTABLE)
209             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
210             col  = col - 1;
211 #else
212             col = baij->colmap[in[j]/bs] - 1;
213 #endif
214             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
215               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
216               col  =  in[j];
217               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
218               B    = baij->B;
219               b    = (Mat_SeqBAIJ*)(B)->data;
220               bimax= b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
221               ba   = b->a;
222             } else col += in[j]%bs;
223           } else col = in[j];
224           if (roworiented) value = v[i*n+j];
225           else             value = v[i+j*m];
226           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
227           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
228         }
229       }
230     } else {  /* off processor entry */
231       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
232       if (!baij->donotstash) {
233         mat->assembled = PETSC_FALSE;
234         n_loc          = 0;
235         for (j=0; j<n; j++) {
236           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
237           in_loc[n_loc] = in[j];
238           if (roworiented) {
239             v_loc[n_loc] = v[i*n+j];
240           } else {
241             v_loc[n_loc] = v[j*m+i];
242           }
243           n_loc++;
244         }
245         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
246       }
247     }
248   }
249   PetscFunctionReturn(0);
250 }
251 
252 #undef __FUNCT__
253 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
254 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
255 {
256   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
257   const MatScalar *value;
258   MatScalar       *barray     =baij->barray;
259   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
260   PetscErrorCode  ierr;
261   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
262   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
263   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
264 
265   PetscFunctionBegin;
266   if (!barray) {
267     ierr         = PetscMalloc1(bs2,&barray);CHKERRQ(ierr);
268     baij->barray = barray;
269   }
270 
271   if (roworiented) {
272     stepval = (n-1)*bs;
273   } else {
274     stepval = (m-1)*bs;
275   }
276   for (i=0; i<m; i++) {
277     if (im[i] < 0) continue;
278 #if defined(PETSC_USE_DEBUG)
279     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
280 #endif
281     if (im[i] >= rstart && im[i] < rend) {
282       row = im[i] - rstart;
283       for (j=0; j<n; j++) {
284         if (im[i] > in[j]) {
285           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
286           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
287         }
288         /* If NumCol = 1 then a copy is not required */
289         if ((roworiented) && (n == 1)) {
290           barray = (MatScalar*) v + i*bs2;
291         } else if ((!roworiented) && (m == 1)) {
292           barray = (MatScalar*) v + j*bs2;
293         } else { /* Here a copy is required */
294           if (roworiented) {
295             value = v + i*(stepval+bs)*bs + j*bs;
296           } else {
297             value = v + j*(stepval+bs)*bs + i*bs;
298           }
299           for (ii=0; ii<bs; ii++,value+=stepval) {
300             for (jj=0; jj<bs; jj++) {
301               *barray++ = *value++;
302             }
303           }
304           barray -=bs2;
305         }
306 
307         if (in[j] >= cstart && in[j] < cend) {
308           col  = in[j] - cstart;
309           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
310         } else if (in[j] < 0) continue;
311 #if defined(PETSC_USE_DEBUG)
312         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
313 #endif
314         else {
315           if (mat->was_assembled) {
316             if (!baij->colmap) {
317               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
318             }
319 
320 #if defined(PETSC_USE_DEBUG)
321 #if defined(PETSC_USE_CTABLE)
322             { PetscInt data;
323               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
324               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
325             }
326 #else
327             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328 #endif
329 #endif
330 #if defined(PETSC_USE_CTABLE)
331             ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
332             col  = (col - 1)/bs;
333 #else
334             col = (baij->colmap[in[j]] - 1)/bs;
335 #endif
336             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
337               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
338               col  = in[j];
339             }
340           } else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
346       if (!baij->donotstash) {
347         if (roworiented) {
348           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
349         } else {
350           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
351         }
352       }
353     }
354   }
355   PetscFunctionReturn(0);
356 }
357 
358 #undef __FUNCT__
359 #define __FUNCT__ "MatGetValues_MPISBAIJ"
360 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
361 {
362   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
363   PetscErrorCode ierr;
364   PetscInt       bs       = mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
365   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
366 
367   PetscFunctionBegin;
368   for (i=0; i<m; i++) {
369     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
370     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
371     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
372       row = idxm[i] - bsrstart;
373       for (j=0; j<n; j++) {
374         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
375         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
376         if (idxn[j] >= bscstart && idxn[j] < bscend) {
377           col  = idxn[j] - bscstart;
378           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
379         } else {
380           if (!baij->colmap) {
381             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
382           }
383 #if defined(PETSC_USE_CTABLE)
384           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
385           data--;
386 #else
387           data = baij->colmap[idxn[j]/bs]-1;
388 #endif
389           if ((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
390           else {
391             col  = data + idxn[j]%bs;
392             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
393           }
394         }
395       }
396     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
397   }
398   PetscFunctionReturn(0);
399 }
400 
401 #undef __FUNCT__
402 #define __FUNCT__ "MatNorm_MPISBAIJ"
403 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
404 {
405   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
406   PetscErrorCode ierr;
407   PetscReal      sum[2],*lnorm2;
408 
409   PetscFunctionBegin;
410   if (baij->size == 1) {
411     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
412   } else {
413     if (type == NORM_FROBENIUS) {
414       ierr    = PetscMalloc1(2,&lnorm2);CHKERRQ(ierr);
415       ierr    =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
416       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
417       ierr    =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
418       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
419       ierr    = MPI_Allreduce(lnorm2,sum,2,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
420       *norm   = PetscSqrtReal(sum[0] + 2*sum[1]);
421       ierr    = PetscFree(lnorm2);CHKERRQ(ierr);
422     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
423       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
424       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
425       PetscReal    *rsum,*rsum2,vabs;
426       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
427       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
428       MatScalar    *v;
429 
430       ierr = PetscMalloc2(mat->cmap->N,&rsum,mat->cmap->N,&rsum2);CHKERRQ(ierr);
431       ierr = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
432       /* Amat */
433       v = amat->a; jj = amat->j;
434       for (brow=0; brow<mbs; brow++) {
435         grow = bs*(rstart + brow);
436         nz   = amat->i[brow+1] - amat->i[brow];
437         for (bcol=0; bcol<nz; bcol++) {
438           gcol = bs*(rstart + *jj); jj++;
439           for (col=0; col<bs; col++) {
440             for (row=0; row<bs; row++) {
441               vabs            = PetscAbsScalar(*v); v++;
442               rsum[gcol+col] += vabs;
443               /* non-diagonal block */
444               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
445             }
446           }
447         }
448       }
449       /* Bmat */
450       v = bmat->a; jj = bmat->j;
451       for (brow=0; brow<mbs; brow++) {
452         grow = bs*(rstart + brow);
453         nz = bmat->i[brow+1] - bmat->i[brow];
454         for (bcol=0; bcol<nz; bcol++) {
455           gcol = bs*garray[*jj]; jj++;
456           for (col=0; col<bs; col++) {
457             for (row=0; row<bs; row++) {
458               vabs            = PetscAbsScalar(*v); v++;
459               rsum[gcol+col] += vabs;
460               rsum[grow+row] += vabs;
461             }
462           }
463         }
464       }
465       ierr  = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
466       *norm = 0.0;
467       for (col=0; col<mat->cmap->N; col++) {
468         if (rsum2[col] > *norm) *norm = rsum2[col];
469       }
470       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
471     } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
472   }
473   PetscFunctionReturn(0);
474 }
475 
476 #undef __FUNCT__
477 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
478 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
479 {
480   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
481   PetscErrorCode ierr;
482   PetscInt       nstash,reallocs;
483   InsertMode     addv;
484 
485   PetscFunctionBegin;
486   if (baij->donotstash || mat->nooffprocentries) PetscFunctionReturn(0);
487 
488   /* make sure all processors are either in INSERTMODE or ADDMODE */
489   ierr = MPI_Allreduce((PetscEnum*)&mat->insertmode,(PetscEnum*)&addv,1,MPIU_ENUM,MPI_BOR,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
490   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
491   mat->insertmode = addv; /* in case this processor had no cache */
492 
493   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
494   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
495   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
496   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
497   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
498   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
499   PetscFunctionReturn(0);
500 }
501 
502 #undef __FUNCT__
503 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
504 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
505 {
506   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
507   Mat_SeqSBAIJ   *a   =(Mat_SeqSBAIJ*)baij->A->data;
508   PetscErrorCode ierr;
509   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
510   PetscInt       *row,*col;
511   PetscBool      other_disassembled;
512   PetscMPIInt    n;
513   PetscBool      r1,r2,r3;
514   MatScalar      *val;
515   InsertMode     addv = mat->insertmode;
516 
517   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
518   PetscFunctionBegin;
519   if (!baij->donotstash &&  !mat->nooffprocentries) {
520     while (1) {
521       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
522       if (!flg) break;
523 
524       for (i=0; i<n;) {
525         /* Now identify the consecutive vals belonging to the same row */
526         for (j=i,rstart=row[j]; j<n; j++) {
527           if (row[j] != rstart) break;
528         }
529         if (j < n) ncols = j-i;
530         else       ncols = n-i;
531         /* Now assemble all these values with a single function call */
532         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
533         i    = j;
534       }
535     }
536     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
537     /* Now process the block-stash. Since the values are stashed column-oriented,
538        set the roworiented flag to column oriented, and after MatSetValues()
539        restore the original flags */
540     r1 = baij->roworiented;
541     r2 = a->roworiented;
542     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
543 
544     baij->roworiented = PETSC_FALSE;
545     a->roworiented    = PETSC_FALSE;
546 
547     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = PETSC_FALSE; /* b->roworinted */
548     while (1) {
549       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
550       if (!flg) break;
551 
552       for (i=0; i<n;) {
553         /* Now identify the consecutive vals belonging to the same row */
554         for (j=i,rstart=row[j]; j<n; j++) {
555           if (row[j] != rstart) break;
556         }
557         if (j < n) ncols = j-i;
558         else       ncols = n-i;
559         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
560         i    = j;
561       }
562     }
563     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
564 
565     baij->roworiented = r1;
566     a->roworiented    = r2;
567 
568     ((Mat_SeqBAIJ*)baij->B->data)->roworiented = r3; /* b->roworinted */
569   }
570 
571   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
572   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
573 
574   /* determine if any processor has disassembled, if so we must
575      also disassemble ourselfs, in order that we may reassemble. */
576   /*
577      if nonzero structure of submatrix B cannot change then we know that
578      no processor disassembled thus we can skip this stuff
579   */
580   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew) {
581     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPIU_BOOL,MPI_PROD,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
582     if (mat->was_assembled && !other_disassembled) {
583       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
584     }
585   }
586 
587   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
588     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
589   }
590   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
591   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
592 
593   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
594 
595   baij->rowvalues = 0;
596 
597   /* if no new nonzero locations are allowed in matrix then only set the matrix state the first time through */
598   if ((!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) || !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
599     PetscObjectState state = baij->A->nonzerostate + baij->B->nonzerostate;
600     ierr = MPI_Allreduce(&state,&mat->nonzerostate,1,MPIU_INT64,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr);
601   }
602   PetscFunctionReturn(0);
603 }
604 
605 extern PetscErrorCode MatView_SeqSBAIJ_ASCII(Mat,PetscViewer);
606 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
607 #include <petscdraw.h>
608 #undef __FUNCT__
609 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
610 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
611 {
612   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
613   PetscErrorCode    ierr;
614   PetscInt          bs   = mat->rmap->bs;
615   PetscMPIInt       rank = baij->rank;
616   PetscBool         iascii,isdraw;
617   PetscViewer       sviewer;
618   PetscViewerFormat format;
619 
620   PetscFunctionBegin;
621   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
622   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
623   if (iascii) {
624     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
625     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
626       MatInfo info;
627       ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr);
628       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
631       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
633       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
634       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
635       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
636       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
637       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
638       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
639       PetscFunctionReturn(0);
640     } else if (format == PETSC_VIEWER_ASCII_INFO) {
641       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
642       PetscFunctionReturn(0);
643     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
644       PetscFunctionReturn(0);
645     }
646   }
647 
648   if (isdraw) {
649     PetscDraw draw;
650     PetscBool isnull;
651     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
652     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
653   }
654 
655   {
656     /* assemble the entire matrix onto first processor. */
657     Mat          A;
658     Mat_SeqSBAIJ *Aloc;
659     Mat_SeqBAIJ  *Bloc;
660     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
661     MatScalar    *a;
662 
663     /* Should this be the same type as mat? */
664     ierr = MatCreate(PetscObjectComm((PetscObject)mat),&A);CHKERRQ(ierr);
665     if (!rank) {
666       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
667     } else {
668       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
669     }
670     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
671     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,NULL,0,NULL);CHKERRQ(ierr);
672     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
673     ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)A);CHKERRQ(ierr);
674 
675     /* copy over the A part */
676     Aloc = (Mat_SeqSBAIJ*)baij->A->data;
677     ai   = Aloc->i; aj = Aloc->j; a = Aloc->a;
678     ierr = PetscMalloc1(bs,&rvals);CHKERRQ(ierr);
679 
680     for (i=0; i<mbs; i++) {
681       rvals[0] = bs*(baij->rstartbs + i);
682       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
683       for (j=ai[i]; j<ai[i+1]; j++) {
684         col = (baij->cstartbs+aj[j])*bs;
685         for (k=0; k<bs; k++) {
686           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
687           col++;
688           a += bs;
689         }
690       }
691     }
692     /* copy over the B part */
693     Bloc = (Mat_SeqBAIJ*)baij->B->data;
694     ai   = Bloc->i; aj = Bloc->j; a = Bloc->a;
695     for (i=0; i<mbs; i++) {
696 
697       rvals[0] = bs*(baij->rstartbs + i);
698       for (j=1; j<bs; j++) rvals[j] = rvals[j-1] + 1;
699       for (j=ai[i]; j<ai[i+1]; j++) {
700         col = baij->garray[aj[j]]*bs;
701         for (k=0; k<bs; k++) {
702           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
703           col++;
704           a += bs;
705         }
706       }
707     }
708     ierr = PetscFree(rvals);CHKERRQ(ierr);
709     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
710     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
711     /*
712        Everyone has to call to draw the matrix since the graphics waits are
713        synchronized across all processors that share the PetscDraw object
714     */
715     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
716     if (!rank) {
717         ierr = MatView_SeqSBAIJ_ASCII(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
718     }
719     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
720     ierr = MatDestroy(&A);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPISBAIJ"
727 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
728 {
729   PetscErrorCode ierr;
730   PetscBool      iascii,isdraw,issocket,isbinary;
731 
732   PetscFunctionBegin;
733   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
734   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
737   if (iascii || isdraw || issocket || isbinary) {
738     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatDestroy_MPISBAIJ"
745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751 #if defined(PETSC_USE_LOG)
752   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
753 #endif
754   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
755   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
756   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
757   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
758 #if defined(PETSC_USE_CTABLE)
759   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
760 #else
761   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
762 #endif
763   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
765   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
771   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
772   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
773   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
774   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
776   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
778 #if defined(PETSC_USE_REAL_MAT_SINGLE)
779   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
780 #endif
781   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
782   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
784   ierr = PetscFree(mat->data);CHKERRQ(ierr);
785 
786   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C",NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C",NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C",NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C",NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C",NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807 
808   /* diagonal part */
809   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
810   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
811 
812   /* subdiagonal part */
813   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
814 
815   /* copy x into the vec slvec0 */
816   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
820   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   /* supperdiagonal part */
826   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMult_MPISBAIJ"
832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
833 {
834   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
835   PetscErrorCode ierr;
836   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
837   PetscScalar    *x,*from;
838 
839   PetscFunctionBegin;
840   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
841   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
842 
843   /* diagonal part */
844   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
845   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
846 
847   /* subdiagonal part */
848   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
849 
850   /* copy x into the vec slvec0 */
851   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
855   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* supperdiagonal part */
861   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
868 {
869   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
870   PetscErrorCode ierr;
871   PetscInt       nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876 
877   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
878   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
879 
880   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   /* do diagonal part */
882   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
883   /* do supperdiagonal part */
884   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
886   /* do subdiagonal part */
887   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
888   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
895 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
900   PetscScalar    *x,*from,zero=0.0;
901 
902   PetscFunctionBegin;
903   /*
904   PetscSynchronizedPrintf(PetscObjectComm((PetscObject)A)," MatMultAdd is called ...\n");
905   PetscSynchronizedFlush(PetscObjectComm((PetscObject)A),PETSC_STDOUT);
906   */
907   /* diagonal part */
908   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
909   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
910 
911   /* subdiagonal part */
912   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
913 
914   /* copy x into the vec slvec0 */
915   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
916   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
917   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
918   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
919 
920   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
922   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923 
924   /* supperdiagonal part */
925   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
926   PetscFunctionReturn(0);
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
931 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
932 {
933   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
934   PetscErrorCode ierr;
935 
936   PetscFunctionBegin;
937   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
938   /* do diagonal part */
939   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
940   /* do supperdiagonal part */
941   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
943 
944   /* do subdiagonal part */
945   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
946   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
947   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 /*
952   This only works correctly for square matrices where the subblock A->A is the
953    diagonal block
954 */
955 #undef __FUNCT__
956 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
957 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
958 {
959   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
960   PetscErrorCode ierr;
961 
962   PetscFunctionBegin;
963   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
964   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatScale_MPISBAIJ"
970 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
977   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
978   PetscFunctionReturn(0);
979 }
980 
981 #undef __FUNCT__
982 #define __FUNCT__ "MatGetRow_MPISBAIJ"
983 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
984 {
985   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
986   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
987   PetscErrorCode ierr;
988   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
989   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
990   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
991 
992   PetscFunctionBegin;
993   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
994   mat->getrowactive = PETSC_TRUE;
995 
996   if (!mat->rowvalues && (idx || v)) {
997     /*
998         allocate enough space to hold information from the longest row.
999     */
1000     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1001     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1002     PetscInt     max = 1,mbs = mat->mbs,tmp;
1003     for (i=0; i<mbs; i++) {
1004       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1005       if (max < tmp) max = tmp;
1006     }
1007     ierr = PetscMalloc2(max*bs2,&mat->rowvalues,max*bs2,&mat->rowindices);CHKERRQ(ierr);
1008   }
1009 
1010   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1011   lrow = row - brstart;  /* local row index */
1012 
1013   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1014   if (!v)   {pvA = 0; pvB = 0;}
1015   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1016   ierr  = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1017   ierr  = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1018   nztot = nzA + nzB;
1019 
1020   cmap = mat->garray;
1021   if (v  || idx) {
1022     if (nztot) {
1023       /* Sort by increasing column numbers, assuming A and B already sorted */
1024       PetscInt imark = -1;
1025       if (v) {
1026         *v = v_p = mat->rowvalues;
1027         for (i=0; i<nzB; i++) {
1028           if (cmap[cworkB[i]/bs] < cstart) v_p[i] = vworkB[i];
1029           else break;
1030         }
1031         imark = i;
1032         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1033         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1034       }
1035       if (idx) {
1036         *idx = idx_p = mat->rowindices;
1037         if (imark > -1) {
1038           for (i=0; i<imark; i++) {
1039             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1040           }
1041         } else {
1042           for (i=0; i<nzB; i++) {
1043             if (cmap[cworkB[i]/bs] < cstart) idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1044             else break;
1045           }
1046           imark = i;
1047         }
1048         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1049         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050       }
1051     } else {
1052       if (idx) *idx = 0;
1053       if (v)   *v   = 0;
1054     }
1055   }
1056   *nz  = nztot;
1057   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1058   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1059   PetscFunctionReturn(0);
1060 }
1061 
1062 #undef __FUNCT__
1063 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1064 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1065 {
1066   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1067 
1068   PetscFunctionBegin;
1069   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1070   baij->getrowactive = PETSC_FALSE;
1071   PetscFunctionReturn(0);
1072 }
1073 
1074 #undef __FUNCT__
1075 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1076 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1077 {
1078   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1079   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1080 
1081   PetscFunctionBegin;
1082   aA->getrow_utriangular = PETSC_TRUE;
1083   PetscFunctionReturn(0);
1084 }
1085 #undef __FUNCT__
1086 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1087 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1088 {
1089   Mat_MPISBAIJ *a  = (Mat_MPISBAIJ*)A->data;
1090   Mat_SeqSBAIJ *aA = (Mat_SeqSBAIJ*)a->A->data;
1091 
1092   PetscFunctionBegin;
1093   aA->getrow_utriangular = PETSC_FALSE;
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1099 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1100 {
1101   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1106   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1112 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1113 {
1114   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1115   PetscErrorCode ierr;
1116 
1117   PetscFunctionBegin;
1118   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1119   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1125 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1126 {
1127   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1128   PetscErrorCode ierr;
1129 
1130   PetscFunctionBegin;
1131   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1132   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1133   PetscFunctionReturn(0);
1134 }
1135 
1136 #undef __FUNCT__
1137 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1138 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1139 {
1140   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1141   Mat            A  = a->A,B = a->B;
1142   PetscErrorCode ierr;
1143   PetscReal      isend[5],irecv[5];
1144 
1145   PetscFunctionBegin;
1146   info->block_size = (PetscReal)matin->rmap->bs;
1147 
1148   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1149 
1150   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1151   isend[3] = info->memory;  isend[4] = info->mallocs;
1152 
1153   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1154 
1155   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1156   isend[3] += info->memory;  isend[4] += info->mallocs;
1157   if (flag == MAT_LOCAL) {
1158     info->nz_used      = isend[0];
1159     info->nz_allocated = isend[1];
1160     info->nz_unneeded  = isend[2];
1161     info->memory       = isend[3];
1162     info->mallocs      = isend[4];
1163   } else if (flag == MAT_GLOBAL_MAX) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1165 
1166     info->nz_used      = irecv[0];
1167     info->nz_allocated = irecv[1];
1168     info->nz_unneeded  = irecv[2];
1169     info->memory       = irecv[3];
1170     info->mallocs      = irecv[4];
1171   } else if (flag == MAT_GLOBAL_SUM) {
1172     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,PetscObjectComm((PetscObject)matin));CHKERRQ(ierr);
1173 
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1180   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1181   info->fill_ratio_needed = 0;
1182   info->factor_mallocs    = 0;
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool flg)
1189 {
1190   Mat_MPISBAIJ   *a  = (Mat_MPISBAIJ*)A->data;
1191   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   switch (op) {
1196   case MAT_NEW_NONZERO_LOCATIONS:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1199   case MAT_KEEP_NONZERO_PATTERN:
1200   case MAT_NEW_NONZERO_LOCATION_ERR:
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = flg;
1206 
1207     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1208     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1209     break;
1210   case MAT_NEW_DIAGONALS:
1211     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1212     break;
1213   case MAT_IGNORE_OFF_PROC_ENTRIES:
1214     a->donotstash = flg;
1215     break;
1216   case MAT_USE_HASH_TABLE:
1217     a->ht_flag = flg;
1218     break;
1219   case MAT_HERMITIAN:
1220     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1221     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1222 
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set = PETSC_TRUE;
1227     A->spd     = flg;
1228     if (flg) {
1229       A->symmetric                  = PETSC_TRUE;
1230       A->structurally_symmetric     = PETSC_TRUE;
1231       A->symmetric_set              = PETSC_TRUE;
1232       A->structurally_symmetric_set = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265 
1266   PetscFunctionBegin;
1267   if (MAT_INITIAL_MATRIX || *B != A) {
1268     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1269   }
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 #undef __FUNCT__
1274 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1275 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1276 {
1277   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1278   Mat            a     = baij->A, b=baij->B;
1279   PetscErrorCode ierr;
1280   PetscInt       nv,m,n;
1281   PetscBool      flg;
1282 
1283   PetscFunctionBegin;
1284   if (ll != rr) {
1285     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1286     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1287   }
1288   if (!ll) PetscFunctionReturn(0);
1289 
1290   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1291   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1292 
1293   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1294   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1295 
1296   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1297 
1298   /* left diagonalscale the off-diagonal part */
1299   ierr = (*b->ops->diagonalscale)(b,ll,NULL);CHKERRQ(ierr);
1300 
1301   /* scale the diagonal part */
1302   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1303 
1304   /* right diagonalscale the off-diagonal part */
1305   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1306   ierr = (*b->ops->diagonalscale)(b,NULL,baij->lvec);CHKERRQ(ierr);
1307   PetscFunctionReturn(0);
1308 }
1309 
1310 #undef __FUNCT__
1311 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1312 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1313 {
1314   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1315   PetscErrorCode ierr;
1316 
1317   PetscFunctionBegin;
1318   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat*);
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatEqual_MPISBAIJ"
1326 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1327 {
1328   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1329   Mat            a,b,c,d;
1330   PetscBool      flg;
1331   PetscErrorCode ierr;
1332 
1333   PetscFunctionBegin;
1334   a = matA->A; b = matA->B;
1335   c = matB->A; d = matB->B;
1336 
1337   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1338   if (flg) {
1339     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1340   }
1341   ierr = MPI_Allreduce(&flg,flag,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 #undef __FUNCT__
1346 #define __FUNCT__ "MatCopy_MPISBAIJ"
1347 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1348 {
1349   PetscErrorCode ierr;
1350   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1351   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)B->data;
1352 
1353   PetscFunctionBegin;
1354   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1355   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1356     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1357     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1358     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1359   } else {
1360     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1361     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1368 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1369 {
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1379 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1380 {
1381   PetscErrorCode ierr;
1382   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ*)X->data,*yy=(Mat_MPISBAIJ*)Y->data;
1383   PetscBLASInt   bnz,one=1;
1384   Mat_SeqSBAIJ   *xa,*ya;
1385   Mat_SeqBAIJ    *xb,*yb;
1386 
1387   PetscFunctionBegin;
1388   if (str == SAME_NONZERO_PATTERN) {
1389     PetscScalar alpha = a;
1390     xa   = (Mat_SeqSBAIJ*)xx->A->data;
1391     ya   = (Mat_SeqSBAIJ*)yy->A->data;
1392     ierr = PetscBLASIntCast(xa->nz,&bnz);CHKERRQ(ierr);
1393     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one));
1394     xb   = (Mat_SeqBAIJ*)xx->B->data;
1395     yb   = (Mat_SeqBAIJ*)yy->B->data;
1396     ierr = PetscBLASIntCast(xb->nz,&bnz);CHKERRQ(ierr);
1397     PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one));
1398   } else {
1399     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1400     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1401     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1402   }
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1408 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1409 {
1410   PetscErrorCode ierr;
1411   PetscInt       i;
1412   PetscBool      flg;
1413 
1414   PetscFunctionBegin;
1415   for (i=0; i<n; i++) {
1416     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1417     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1418   }
1419   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 
1424 /* -------------------------------------------------------------------*/
1425 static struct _MatOps MatOps_Values = {MatSetValues_MPISBAIJ,
1426                                        MatGetRow_MPISBAIJ,
1427                                        MatRestoreRow_MPISBAIJ,
1428                                        MatMult_MPISBAIJ,
1429                                /*  4*/ MatMultAdd_MPISBAIJ,
1430                                        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1431                                        MatMultAdd_MPISBAIJ,
1432                                        0,
1433                                        0,
1434                                        0,
1435                                /* 10*/ 0,
1436                                        0,
1437                                        0,
1438                                        MatSOR_MPISBAIJ,
1439                                        MatTranspose_MPISBAIJ,
1440                                /* 15*/ MatGetInfo_MPISBAIJ,
1441                                        MatEqual_MPISBAIJ,
1442                                        MatGetDiagonal_MPISBAIJ,
1443                                        MatDiagonalScale_MPISBAIJ,
1444                                        MatNorm_MPISBAIJ,
1445                                /* 20*/ MatAssemblyBegin_MPISBAIJ,
1446                                        MatAssemblyEnd_MPISBAIJ,
1447                                        MatSetOption_MPISBAIJ,
1448                                        MatZeroEntries_MPISBAIJ,
1449                                /* 24*/ 0,
1450                                        0,
1451                                        0,
1452                                        0,
1453                                        0,
1454                                /* 29*/ MatSetUp_MPISBAIJ,
1455                                        0,
1456                                        0,
1457                                        0,
1458                                        0,
1459                                /* 34*/ MatDuplicate_MPISBAIJ,
1460                                        0,
1461                                        0,
1462                                        0,
1463                                        0,
1464                                /* 39*/ MatAXPY_MPISBAIJ,
1465                                        MatGetSubMatrices_MPISBAIJ,
1466                                        MatIncreaseOverlap_MPISBAIJ,
1467                                        MatGetValues_MPISBAIJ,
1468                                        MatCopy_MPISBAIJ,
1469                                /* 44*/ 0,
1470                                        MatScale_MPISBAIJ,
1471                                        0,
1472                                        0,
1473                                        0,
1474                                /* 49*/ 0,
1475                                        0,
1476                                        0,
1477                                        0,
1478                                        0,
1479                                /* 54*/ 0,
1480                                        0,
1481                                        MatSetUnfactored_MPISBAIJ,
1482                                        0,
1483                                        MatSetValuesBlocked_MPISBAIJ,
1484                                /* 59*/ 0,
1485                                        0,
1486                                        0,
1487                                        0,
1488                                        0,
1489                                /* 64*/ 0,
1490                                        0,
1491                                        0,
1492                                        0,
1493                                        0,
1494                                /* 69*/ MatGetRowMaxAbs_MPISBAIJ,
1495                                        0,
1496                                        0,
1497                                        0,
1498                                        0,
1499                                /* 74*/ 0,
1500                                        0,
1501                                        0,
1502                                        0,
1503                                        0,
1504                                /* 79*/ 0,
1505                                        0,
1506                                        0,
1507                                        0,
1508                                        MatLoad_MPISBAIJ,
1509                                /* 84*/ 0,
1510                                        0,
1511                                        0,
1512                                        0,
1513                                        0,
1514                                /* 89*/ 0,
1515                                        0,
1516                                        0,
1517                                        0,
1518                                        0,
1519                                /* 94*/ 0,
1520                                        0,
1521                                        0,
1522                                        0,
1523                                        0,
1524                                /* 99*/ 0,
1525                                        0,
1526                                        0,
1527                                        0,
1528                                        0,
1529                                /*104*/ 0,
1530                                        MatRealPart_MPISBAIJ,
1531                                        MatImaginaryPart_MPISBAIJ,
1532                                        MatGetRowUpperTriangular_MPISBAIJ,
1533                                        MatRestoreRowUpperTriangular_MPISBAIJ,
1534                                /*109*/ 0,
1535                                        0,
1536                                        0,
1537                                        0,
1538                                        0,
1539                                /*114*/ 0,
1540                                        0,
1541                                        0,
1542                                        0,
1543                                        0,
1544                                /*119*/ 0,
1545                                        0,
1546                                        0,
1547                                        0,
1548                                        0,
1549                                /*124*/ 0,
1550                                        0,
1551                                        0,
1552                                        0,
1553                                        0,
1554                                /*129*/ 0,
1555                                        0,
1556                                        0,
1557                                        0,
1558                                        0,
1559                                /*134*/ 0,
1560                                        0,
1561                                        0,
1562                                        0,
1563                                        0,
1564                                /*139*/ 0,
1565                                        0,
1566                                        0
1567 };
1568 
1569 
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1572 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1573 {
1574   PetscFunctionBegin;
1575   *a = ((Mat_MPISBAIJ*)A->data)->A;
1576   PetscFunctionReturn(0);
1577 }
1578 
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1581 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt *d_nnz,PetscInt o_nz,const PetscInt *o_nnz)
1582 {
1583   Mat_MPISBAIJ   *b;
1584   PetscErrorCode ierr;
1585   PetscInt       i,mbs,Mbs;
1586 
1587   PetscFunctionBegin;
1588   ierr = MatSetBlockSize(B,PetscAbs(bs));CHKERRQ(ierr);
1589   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1590   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1591   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1592 
1593   b   = (Mat_MPISBAIJ*)B->data;
1594   mbs = B->rmap->n/bs;
1595   Mbs = B->rmap->N/bs;
1596   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1597 
1598   B->rmap->bs = bs;
1599   b->bs2      = bs*bs;
1600   b->mbs      = mbs;
1601   b->nbs      = mbs;
1602   b->Mbs      = Mbs;
1603   b->Nbs      = Mbs;
1604 
1605   for (i=0; i<=b->size; i++) {
1606     b->rangebs[i] = B->rmap->range[i]/bs;
1607   }
1608   b->rstartbs = B->rmap->rstart/bs;
1609   b->rendbs   = B->rmap->rend/bs;
1610 
1611   b->cstartbs = B->cmap->rstart/bs;
1612   b->cendbs   = B->cmap->rend/bs;
1613 
1614   if (!B->preallocated) {
1615     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1616     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1617     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1618     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->A);CHKERRQ(ierr);
1619     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1620     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1621     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1622     ierr = PetscLogObjectParent((PetscObject)B,(PetscObject)b->B);CHKERRQ(ierr);
1623     ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),bs,&B->bstash);CHKERRQ(ierr);
1624   }
1625 
1626   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1627   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1628 
1629   B->preallocated = PETSC_TRUE;
1630   PetscFunctionReturn(0);
1631 }
1632 
1633 #undef __FUNCT__
1634 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1635 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1636 {
1637   PetscInt       m,rstart,cstart,cend;
1638   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1639   const PetscInt *JJ    =0;
1640   PetscScalar    *values=0;
1641   PetscErrorCode ierr;
1642 
1643   PetscFunctionBegin;
1644   if (bs < 1) SETERRQ1(PetscObjectComm((PetscObject)B),PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1645   ierr   = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1646   ierr   = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1647   ierr   = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1648   ierr   = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1649   ierr   = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1650   m      = B->rmap->n/bs;
1651   rstart = B->rmap->rstart/bs;
1652   cstart = B->cmap->rstart/bs;
1653   cend   = B->cmap->rend/bs;
1654 
1655   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1656   ierr = PetscMalloc2(m,&d_nnz,m,&o_nnz);CHKERRQ(ierr);
1657   for (i=0; i<m; i++) {
1658     nz = ii[i+1] - ii[i];
1659     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1660     nz_max = PetscMax(nz_max,nz);
1661     JJ     = jj + ii[i];
1662     for (j=0; j<nz; j++) {
1663       if (*JJ >= cstart) break;
1664       JJ++;
1665     }
1666     d = 0;
1667     for (; j<nz; j++) {
1668       if (*JJ++ >= cend) break;
1669       d++;
1670     }
1671     d_nnz[i] = d;
1672     o_nnz[i] = nz - d;
1673   }
1674   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1675   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1676 
1677   values = (PetscScalar*)V;
1678   if (!values) {
1679     ierr = PetscMalloc1(bs*bs*nz_max,&values);CHKERRQ(ierr);
1680     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1681   }
1682   for (i=0; i<m; i++) {
1683     PetscInt          row    = i + rstart;
1684     PetscInt          ncols  = ii[i+1] - ii[i];
1685     const PetscInt    *icols = jj + ii[i];
1686     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1687     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1688   }
1689 
1690   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1691   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1692   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1693   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1694   PetscFunctionReturn(0);
1695 }
1696 
1697 #if defined(PETSC_HAVE_MUMPS)
1698 PETSC_EXTERN PetscErrorCode MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1699 #endif
1700 #if defined(PETSC_HAVE_PASTIX)
1701 PETSC_EXTERN PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1702 #endif
1703 
1704 /*MC
1705    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1706    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1707    the matrix is stored.
1708 
1709   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1710   can call MatSetOption(Mat, MAT_HERMITIAN);
1711 
1712    Options Database Keys:
1713 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1714 
1715   Level: beginner
1716 
1717 .seealso: MatCreateMPISBAIJ
1718 M*/
1719 
1720 PETSC_EXTERN PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,MatType,MatReuse,Mat*);
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatCreate_MPISBAIJ"
1724 PETSC_EXTERN PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1725 {
1726   Mat_MPISBAIJ   *b;
1727   PetscErrorCode ierr;
1728   PetscBool      flg;
1729 
1730   PetscFunctionBegin;
1731   ierr    = PetscNewLog(B,&b);CHKERRQ(ierr);
1732   B->data = (void*)b;
1733   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1734 
1735   B->ops->destroy = MatDestroy_MPISBAIJ;
1736   B->ops->view    = MatView_MPISBAIJ;
1737   B->assembled    = PETSC_FALSE;
1738   B->insertmode   = NOT_SET_VALUES;
1739 
1740   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)B),&b->rank);CHKERRQ(ierr);
1741   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)B),&b->size);CHKERRQ(ierr);
1742 
1743   /* build local table of row and column ownerships */
1744   ierr = PetscMalloc1((b->size+2),&b->rangebs);CHKERRQ(ierr);
1745 
1746   /* build cache for off array entries formed */
1747   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)B),1,&B->stash);CHKERRQ(ierr);
1748 
1749   b->donotstash  = PETSC_FALSE;
1750   b->colmap      = NULL;
1751   b->garray      = NULL;
1752   b->roworiented = PETSC_TRUE;
1753 
1754   /* stuff used in block assembly */
1755   b->barray = 0;
1756 
1757   /* stuff used for matrix vector multiply */
1758   b->lvec    = 0;
1759   b->Mvctx   = 0;
1760   b->slvec0  = 0;
1761   b->slvec0b = 0;
1762   b->slvec1  = 0;
1763   b->slvec1a = 0;
1764   b->slvec1b = 0;
1765   b->sMvctx  = 0;
1766 
1767   /* stuff for MatGetRow() */
1768   b->rowindices   = 0;
1769   b->rowvalues    = 0;
1770   b->getrowactive = PETSC_FALSE;
1771 
1772   /* hash table stuff */
1773   b->ht           = 0;
1774   b->hd           = 0;
1775   b->ht_size      = 0;
1776   b->ht_flag      = PETSC_FALSE;
1777   b->ht_fact      = 0;
1778   b->ht_total_ct  = 0;
1779   b->ht_insert_ct = 0;
1780 
1781   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1782   b->ijonly = PETSC_FALSE;
1783 
1784   b->in_loc = 0;
1785   b->v_loc  = 0;
1786   b->n_loc  = 0;
1787   ierr      = PetscOptionsBegin(PetscObjectComm((PetscObject)B),NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1788   ierr      = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,NULL);CHKERRQ(ierr);
1789   if (flg) {
1790     PetscReal fact = 1.39;
1791     ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1792     ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,NULL);CHKERRQ(ierr);
1793     if (fact <= 1.0) fact = 1.39;
1794     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1795     ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1796   }
1797   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1798 
1799 #if defined(PETSC_HAVE_PASTIX)
1800   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_pastix_C",MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1801 #endif
1802 #if defined(PETSC_HAVE_MUMPS)
1803   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetFactor_mumps_C",MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1804 #endif
1805   ierr = PetscObjectComposeFunction((PetscObject)B,"MatStoreValues_C",MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1806   ierr = PetscObjectComposeFunction((PetscObject)B,"MatRetrieveValues_C",MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1807   ierr = PetscObjectComposeFunction((PetscObject)B,"MatGetDiagonalBlock_C",MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1808   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1809   ierr = PetscObjectComposeFunction((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1810   ierr = PetscObjectComposeFunction((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1811 
1812   B->symmetric                  = PETSC_TRUE;
1813   B->structurally_symmetric     = PETSC_TRUE;
1814   B->symmetric_set              = PETSC_TRUE;
1815   B->structurally_symmetric_set = PETSC_TRUE;
1816 
1817   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1818   PetscFunctionReturn(0);
1819 }
1820 
1821 /*MC
1822    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1823 
1824    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1825    and MATMPISBAIJ otherwise.
1826 
1827    Options Database Keys:
1828 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1829 
1830   Level: beginner
1831 
1832 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1833 M*/
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1837 /*@C
1838    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1839    the user should preallocate the matrix storage by setting the parameters
1840    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1841    performance can be increased by more than a factor of 50.
1842 
1843    Collective on Mat
1844 
1845    Input Parameters:
1846 +  A - the matrix
1847 .  bs   - size of blockk
1848 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1849            submatrix  (same for all local rows)
1850 .  d_nnz - array containing the number of block nonzeros in the various block rows
1851            in the upper triangular and diagonal part of the in diagonal portion of the local
1852            (possibly different for each block row) or NULL.  If you plan to factor the matrix you must leave room
1853            for the diagonal entry and set a value even if it is zero.
1854 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1855            submatrix (same for all local rows).
1856 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1857            off-diagonal portion of the local submatrix that is right of the diagonal
1858            (possibly different for each block row) or NULL.
1859 
1860 
1861    Options Database Keys:
1862 .   -mat_no_unroll - uses code that does not unroll the loops in the
1863                      block calculations (much slower)
1864 .   -mat_block_size - size of the blocks to use
1865 
1866    Notes:
1867 
1868    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1869    than it must be used on all processors that share the object for that argument.
1870 
1871    If the *_nnz parameter is given then the *_nz parameter is ignored
1872 
1873    Storage Information:
1874    For a square global matrix we define each processor's diagonal portion
1875    to be its local rows and the corresponding columns (a square submatrix);
1876    each processor's off-diagonal portion encompasses the remainder of the
1877    local matrix (a rectangular submatrix).
1878 
1879    The user can specify preallocated storage for the diagonal part of
1880    the local submatrix with either d_nz or d_nnz (not both).  Set
1881    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
1882    memory allocation.  Likewise, specify preallocated storage for the
1883    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1884 
1885    You can call MatGetInfo() to get information on how effective the preallocation was;
1886    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1887    You can also run with the option -info and look for messages with the string
1888    malloc in them to see if additional memory allocation was needed.
1889 
1890    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1891    the figure below we depict these three local rows and all columns (0-11).
1892 
1893 .vb
1894            0 1 2 3 4 5 6 7 8 9 10 11
1895           --------------------------
1896    row 3  |. . . d d d o o o o  o  o
1897    row 4  |. . . d d d o o o o  o  o
1898    row 5  |. . . d d d o o o o  o  o
1899           --------------------------
1900 .ve
1901 
1902    Thus, any entries in the d locations are stored in the d (diagonal)
1903    submatrix, and any entries in the o locations are stored in the
1904    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1905    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1906 
1907    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1908    plus the diagonal part of the d matrix,
1909    and o_nz should indicate the number of block nonzeros per row in the o matrix
1910 
1911    In general, for PDE problems in which most nonzeros are near the diagonal,
1912    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1913    or you will get TERRIBLE performance; see the users' manual chapter on
1914    matrices.
1915 
1916    Level: intermediate
1917 
1918 .keywords: matrix, block, aij, compressed row, sparse, parallel
1919 
1920 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ(), PetscSplitOwnership()
1921 @*/
1922 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1923 {
1924   PetscErrorCode ierr;
1925 
1926   PetscFunctionBegin;
1927   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1928   PetscValidType(B,1);
1929   PetscValidLogicalCollectiveInt(B,bs,2);
1930   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1931   PetscFunctionReturn(0);
1932 }
1933 
1934 #undef __FUNCT__
1935 #define __FUNCT__ "MatCreateSBAIJ"
1936 /*@C
1937    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1938    (block compressed row).  For good matrix assembly performance
1939    the user should preallocate the matrix storage by setting the parameters
1940    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1941    performance can be increased by more than a factor of 50.
1942 
1943    Collective on MPI_Comm
1944 
1945    Input Parameters:
1946 +  comm - MPI communicator
1947 .  bs   - size of blockk
1948 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1949            This value should be the same as the local size used in creating the
1950            y vector for the matrix-vector product y = Ax.
1951 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1952            This value should be the same as the local size used in creating the
1953            x vector for the matrix-vector product y = Ax.
1954 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1955 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1956 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1957            submatrix  (same for all local rows)
1958 .  d_nnz - array containing the number of block nonzeros in the various block rows
1959            in the upper triangular portion of the in diagonal portion of the local
1960            (possibly different for each block block row) or NULL.
1961            If you plan to factor the matrix you must leave room for the diagonal entry and
1962            set its value even if it is zero.
1963 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1964            submatrix (same for all local rows).
1965 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1966            off-diagonal portion of the local submatrix (possibly different for
1967            each block row) or NULL.
1968 
1969    Output Parameter:
1970 .  A - the matrix
1971 
1972    Options Database Keys:
1973 .   -mat_no_unroll - uses code that does not unroll the loops in the
1974                      block calculations (much slower)
1975 .   -mat_block_size - size of the blocks to use
1976 .   -mat_mpi - use the parallel matrix data structures even on one processor
1977                (defaults to using SeqBAIJ format on one processor)
1978 
1979    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1980    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1981    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1982 
1983    Notes:
1984    The number of rows and columns must be divisible by blocksize.
1985    This matrix type does not support complex Hermitian operation.
1986 
1987    The user MUST specify either the local or global matrix dimensions
1988    (possibly both).
1989 
1990    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1991    than it must be used on all processors that share the object for that argument.
1992 
1993    If the *_nnz parameter is given then the *_nz parameter is ignored
1994 
1995    Storage Information:
1996    For a square global matrix we define each processor's diagonal portion
1997    to be its local rows and the corresponding columns (a square submatrix);
1998    each processor's off-diagonal portion encompasses the remainder of the
1999    local matrix (a rectangular submatrix).
2000 
2001    The user can specify preallocated storage for the diagonal part of
2002    the local submatrix with either d_nz or d_nnz (not both).  Set
2003    d_nz=PETSC_DEFAULT and d_nnz=NULL for PETSc to control dynamic
2004    memory allocation.  Likewise, specify preallocated storage for the
2005    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2006 
2007    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2008    the figure below we depict these three local rows and all columns (0-11).
2009 
2010 .vb
2011            0 1 2 3 4 5 6 7 8 9 10 11
2012           --------------------------
2013    row 3  |. . . d d d o o o o  o  o
2014    row 4  |. . . d d d o o o o  o  o
2015    row 5  |. . . d d d o o o o  o  o
2016           --------------------------
2017 .ve
2018 
2019    Thus, any entries in the d locations are stored in the d (diagonal)
2020    submatrix, and any entries in the o locations are stored in the
2021    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2022    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2023 
2024    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2025    plus the diagonal part of the d matrix,
2026    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2027    In general, for PDE problems in which most nonzeros are near the diagonal,
2028    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2029    or you will get TERRIBLE performance; see the users' manual chapter on
2030    matrices.
2031 
2032    Level: intermediate
2033 
2034 .keywords: matrix, block, aij, compressed row, sparse, parallel
2035 
2036 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2037 @*/
2038 
2039 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2040 {
2041   PetscErrorCode ierr;
2042   PetscMPIInt    size;
2043 
2044   PetscFunctionBegin;
2045   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2046   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2047   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2048   if (size > 1) {
2049     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2050     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2051   } else {
2052     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2053     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2054   }
2055   PetscFunctionReturn(0);
2056 }
2057 
2058 
2059 #undef __FUNCT__
2060 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2061 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2062 {
2063   Mat            mat;
2064   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2065   PetscErrorCode ierr;
2066   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2067   PetscScalar    *array;
2068 
2069   PetscFunctionBegin;
2070   *newmat = 0;
2071 
2072   ierr = MatCreate(PetscObjectComm((PetscObject)matin),&mat);CHKERRQ(ierr);
2073   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2074   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2075   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2076   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2077   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2078 
2079   mat->factortype   = matin->factortype;
2080   mat->preallocated = PETSC_TRUE;
2081   mat->assembled    = PETSC_TRUE;
2082   mat->insertmode   = NOT_SET_VALUES;
2083 
2084   a      = (Mat_MPISBAIJ*)mat->data;
2085   a->bs2 = oldmat->bs2;
2086   a->mbs = oldmat->mbs;
2087   a->nbs = oldmat->nbs;
2088   a->Mbs = oldmat->Mbs;
2089   a->Nbs = oldmat->Nbs;
2090 
2091 
2092   a->size         = oldmat->size;
2093   a->rank         = oldmat->rank;
2094   a->donotstash   = oldmat->donotstash;
2095   a->roworiented  = oldmat->roworiented;
2096   a->rowindices   = 0;
2097   a->rowvalues    = 0;
2098   a->getrowactive = PETSC_FALSE;
2099   a->barray       = 0;
2100   a->rstartbs     = oldmat->rstartbs;
2101   a->rendbs       = oldmat->rendbs;
2102   a->cstartbs     = oldmat->cstartbs;
2103   a->cendbs       = oldmat->cendbs;
2104 
2105   /* hash table stuff */
2106   a->ht           = 0;
2107   a->hd           = 0;
2108   a->ht_size      = 0;
2109   a->ht_flag      = oldmat->ht_flag;
2110   a->ht_fact      = oldmat->ht_fact;
2111   a->ht_total_ct  = 0;
2112   a->ht_insert_ct = 0;
2113 
2114   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2115   if (oldmat->colmap) {
2116 #if defined(PETSC_USE_CTABLE)
2117     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2118 #else
2119     ierr = PetscMalloc1((a->Nbs),&a->colmap);CHKERRQ(ierr);
2120     ierr = PetscLogObjectMemory((PetscObject)mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2121     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2122 #endif
2123   } else a->colmap = 0;
2124 
2125   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2126     ierr = PetscMalloc1(len,&a->garray);CHKERRQ(ierr);
2127     ierr = PetscLogObjectMemory((PetscObject)mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2128     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2129   } else a->garray = 0;
2130 
2131   ierr = MatStashCreate_Private(PetscObjectComm((PetscObject)matin),matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2132   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->lvec);CHKERRQ(ierr);
2134   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->Mvctx);CHKERRQ(ierr);
2136 
2137   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2139   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2140   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2141 
2142   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2143   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2144   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2145   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2146   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2147   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2148   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2149   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2150   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0);CHKERRQ(ierr);
2151   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec0b);CHKERRQ(ierr);
2153   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1a);CHKERRQ(ierr);
2154   ierr = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->slvec1b);CHKERRQ(ierr);
2155 
2156   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2157   ierr      = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2158   a->sMvctx = oldmat->sMvctx;
2159   ierr      = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->sMvctx);CHKERRQ(ierr);
2160 
2161   ierr    =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2162   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->A);CHKERRQ(ierr);
2163   ierr    =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2164   ierr    = PetscLogObjectParent((PetscObject)mat,(PetscObject)a->B);CHKERRQ(ierr);
2165   ierr    = PetscFunctionListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2166   *newmat = mat;
2167   PetscFunctionReturn(0);
2168 }
2169 
2170 #undef __FUNCT__
2171 #define __FUNCT__ "MatLoad_MPISBAIJ"
2172 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2173 {
2174   PetscErrorCode ierr;
2175   PetscInt       i,nz,j,rstart,rend;
2176   PetscScalar    *vals,*buf;
2177   MPI_Comm       comm;
2178   MPI_Status     status;
2179   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,mmbs;
2180   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols,*locrowlens;
2181   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2182   PetscInt       bs       =1,Mbs,mbs,extra_rows;
2183   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2184   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2185   int            fd;
2186 
2187   PetscFunctionBegin;
2188   ierr = PetscObjectGetComm((PetscObject)viewer,&comm);CHKERRQ(ierr);
2189   ierr = PetscOptionsBegin(comm,NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2190   ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,NULL);CHKERRQ(ierr);
2191   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2192 
2193   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2194   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2195   if (!rank) {
2196     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2197     ierr = PetscBinaryRead(fd,(char*)header,4,PETSC_INT);CHKERRQ(ierr);
2198     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2199     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2200   }
2201 
2202   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2203 
2204   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2205   M    = header[1];
2206   N    = header[2];
2207 
2208   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2209   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2210   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2211 
2212   /* If global sizes are set, check if they are consistent with that given in the file */
2213   if (sizesset) {
2214     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2215   }
2216   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2217   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2218 
2219   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2220 
2221   /*
2222      This code adds extra rows to make sure the number of rows is
2223      divisible by the blocksize
2224   */
2225   Mbs        = M/bs;
2226   extra_rows = bs - M + bs*(Mbs);
2227   if (extra_rows == bs) extra_rows = 0;
2228   else                  Mbs++;
2229   if (extra_rows &&!rank) {
2230     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2231   }
2232 
2233   /* determine ownership of all rows */
2234   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2235     mbs = Mbs/size + ((Mbs % size) > rank);
2236     m   = mbs*bs;
2237   } else { /* User Set */
2238     m   = newmat->rmap->n;
2239     mbs = m/bs;
2240   }
2241   ierr       = PetscMalloc2(size+1,&rowners,size+1,&browners);CHKERRQ(ierr);
2242   ierr       = PetscMPIIntCast(mbs,&mmbs);CHKERRQ(ierr);
2243   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2244   rowners[0] = 0;
2245   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2246   for (i=0; i<=size; i++) browners[i] = rowners[i]*bs;
2247   rstart = rowners[rank];
2248   rend   = rowners[rank+1];
2249 
2250   /* distribute row lengths to all processors */
2251   ierr = PetscMalloc1((rend-rstart)*bs,&locrowlens);CHKERRQ(ierr);
2252   if (!rank) {
2253     ierr = PetscMalloc1((M+extra_rows),&rowlengths);CHKERRQ(ierr);
2254     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2255     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2256     ierr = PetscMalloc1(size,&sndcounts);CHKERRQ(ierr);
2257     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2258     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2259     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2260   } else {
2261     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2262   }
2263 
2264   if (!rank) {   /* procs[0] */
2265     /* calculate the number of nonzeros on each processor */
2266     ierr = PetscMalloc1(size,&procsnz);CHKERRQ(ierr);
2267     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2268     for (i=0; i<size; i++) {
2269       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2270         procsnz[i] += rowlengths[j];
2271       }
2272     }
2273     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2274 
2275     /* determine max buffer needed and allocate it */
2276     maxnz = 0;
2277     for (i=0; i<size; i++) {
2278       maxnz = PetscMax(maxnz,procsnz[i]);
2279     }
2280     ierr = PetscMalloc1(maxnz,&cols);CHKERRQ(ierr);
2281 
2282     /* read in my part of the matrix column indices  */
2283     nz     = procsnz[0];
2284     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2285     mycols = ibuf;
2286     if (size == 1) nz -= extra_rows;
2287     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2288     if (size == 1) {
2289       for (i=0; i< extra_rows; i++) mycols[nz+i] = M+i;
2290     }
2291 
2292     /* read in every ones (except the last) and ship off */
2293     for (i=1; i<size-1; i++) {
2294       nz   = procsnz[i];
2295       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2296       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2297     }
2298     /* read in the stuff for the last proc */
2299     if (size != 1) {
2300       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2301       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2302       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2303       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2304     }
2305     ierr = PetscFree(cols);CHKERRQ(ierr);
2306   } else {  /* procs[i], i>0 */
2307     /* determine buffer space needed for message */
2308     nz = 0;
2309     for (i=0; i<m; i++) nz += locrowlens[i];
2310     ierr   = PetscMalloc1(nz,&ibuf);CHKERRQ(ierr);
2311     mycols = ibuf;
2312     /* receive message of column indices*/
2313     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2314     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2315     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2316   }
2317 
2318   /* loop over local rows, determining number of off diagonal entries */
2319   ierr     = PetscMalloc2(rend-rstart,&dlens,rend-rstart,&odlens);CHKERRQ(ierr);
2320   ierr     = PetscMalloc3(Mbs,&mask,Mbs,&masked1,Mbs,&masked2);CHKERRQ(ierr);
2321   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2322   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2323   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2324   rowcount = 0;
2325   nzcount  = 0;
2326   for (i=0; i<mbs; i++) {
2327     dcount  = 0;
2328     odcount = 0;
2329     for (j=0; j<bs; j++) {
2330       kmax = locrowlens[rowcount];
2331       for (k=0; k<kmax; k++) {
2332         tmp = mycols[nzcount++]/bs; /* block col. index */
2333         if (!mask[tmp]) {
2334           mask[tmp] = 1;
2335           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2336           else masked1[dcount++] = tmp; /* entry in diag portion */
2337         }
2338       }
2339       rowcount++;
2340     }
2341 
2342     dlens[i]  = dcount;  /* d_nzz[i] */
2343     odlens[i] = odcount; /* o_nzz[i] */
2344 
2345     /* zero out the mask elements we set */
2346     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2347     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2348   }
2349   if (!sizesset) {
2350     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2351   }
2352   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2353   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2354 
2355   if (!rank) {
2356     ierr = PetscMalloc1(maxnz,&buf);CHKERRQ(ierr);
2357     /* read in my part of the matrix numerical values  */
2358     nz     = procsnz[0];
2359     vals   = buf;
2360     mycols = ibuf;
2361     if (size == 1) nz -= extra_rows;
2362     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2363     if (size == 1) {
2364       for (i=0; i< extra_rows; i++) vals[nz+i] = 1.0;
2365     }
2366 
2367     /* insert into matrix */
2368     jj = rstart*bs;
2369     for (i=0; i<m; i++) {
2370       ierr    = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2371       mycols += locrowlens[i];
2372       vals   += locrowlens[i];
2373       jj++;
2374     }
2375 
2376     /* read in other processors (except the last one) and ship out */
2377     for (i=1; i<size-1; i++) {
2378       nz   = procsnz[i];
2379       vals = buf;
2380       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2381       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2382     }
2383     /* the last proc */
2384     if (size != 1) {
2385       nz   = procsnz[i] - extra_rows;
2386       vals = buf;
2387       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2388       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2389       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2390     }
2391     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2392 
2393   } else {
2394     /* receive numeric values */
2395     ierr = PetscMalloc1(nz,&buf);CHKERRQ(ierr);
2396 
2397     /* receive message of values*/
2398     vals   = buf;
2399     mycols = ibuf;
2400     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2401     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2402     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2403 
2404     /* insert into matrix */
2405     jj = rstart*bs;
2406     for (i=0; i<m; i++) {
2407       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2408       mycols += locrowlens[i];
2409       vals   += locrowlens[i];
2410       jj++;
2411     }
2412   }
2413 
2414   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2415   ierr = PetscFree(buf);CHKERRQ(ierr);
2416   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2417   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2418   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2419   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2420   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2421   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2422   PetscFunctionReturn(0);
2423 }
2424 
2425 #undef __FUNCT__
2426 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2427 /*XXXXX@
2428    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2429 
2430    Input Parameters:
2431 .  mat  - the matrix
2432 .  fact - factor
2433 
2434    Not Collective on Mat, each process can have a different hash factor
2435 
2436    Level: advanced
2437 
2438   Notes:
2439    This can also be set by the command line option: -mat_use_hash_table fact
2440 
2441 .keywords: matrix, hashtable, factor, HT
2442 
2443 .seealso: MatSetOption()
2444 @XXXXX*/
2445 
2446 
2447 #undef __FUNCT__
2448 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2449 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2450 {
2451   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2452   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2453   PetscReal      atmp;
2454   PetscReal      *work,*svalues,*rvalues;
2455   PetscErrorCode ierr;
2456   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2457   PetscMPIInt    rank,size;
2458   PetscInt       *rowners_bs,dest,count,source;
2459   PetscScalar    *va;
2460   MatScalar      *ba;
2461   MPI_Status     stat;
2462 
2463   PetscFunctionBegin;
2464   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2465   ierr = MatGetRowMaxAbs(a->A,v,NULL);CHKERRQ(ierr);
2466   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2467 
2468   ierr = MPI_Comm_size(PetscObjectComm((PetscObject)A),&size);CHKERRQ(ierr);
2469   ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr);
2470 
2471   bs  = A->rmap->bs;
2472   mbs = a->mbs;
2473   Mbs = a->Mbs;
2474   ba  = b->a;
2475   bi  = b->i;
2476   bj  = b->j;
2477 
2478   /* find ownerships */
2479   rowners_bs = A->rmap->range;
2480 
2481   /* each proc creates an array to be distributed */
2482   ierr = PetscMalloc1(bs*Mbs,&work);CHKERRQ(ierr);
2483   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2484 
2485   /* row_max for B */
2486   if (rank != size-1) {
2487     for (i=0; i<mbs; i++) {
2488       ncols = bi[1] - bi[0]; bi++;
2489       brow  = bs*i;
2490       for (j=0; j<ncols; j++) {
2491         bcol = bs*(*bj);
2492         for (kcol=0; kcol<bs; kcol++) {
2493           col  = bcol + kcol;                /* local col index */
2494           col += rowners_bs[rank+1];      /* global col index */
2495           for (krow=0; krow<bs; krow++) {
2496             atmp = PetscAbsScalar(*ba); ba++;
2497             row  = brow + krow;   /* local row index */
2498             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2499             if (work[col] < atmp) work[col] = atmp;
2500           }
2501         }
2502         bj++;
2503       }
2504     }
2505 
2506     /* send values to its owners */
2507     for (dest=rank+1; dest<size; dest++) {
2508       svalues = work + rowners_bs[dest];
2509       count   = rowners_bs[dest+1]-rowners_bs[dest];
2510       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PetscObjectComm((PetscObject)A));CHKERRQ(ierr);
2511     }
2512   }
2513 
2514   /* receive values */
2515   if (rank) {
2516     rvalues = work;
2517     count   = rowners_bs[rank+1]-rowners_bs[rank];
2518     for (source=0; source<rank; source++) {
2519       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PetscObjectComm((PetscObject)A),&stat);CHKERRQ(ierr);
2520       /* process values */
2521       for (i=0; i<count; i++) {
2522         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2523       }
2524     }
2525   }
2526 
2527   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2528   ierr = PetscFree(work);CHKERRQ(ierr);
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 #undef __FUNCT__
2533 #define __FUNCT__ "MatSOR_MPISBAIJ"
2534 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2535 {
2536   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2537   PetscErrorCode    ierr;
2538   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2539   PetscScalar       *x,*ptr,*from;
2540   Vec               bb1;
2541   const PetscScalar *b;
2542 
2543   PetscFunctionBegin;
2544   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2545   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2546 
2547   if (flag == SOR_APPLY_UPPER) {
2548     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2549     PetscFunctionReturn(0);
2550   }
2551 
2552   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2553     if (flag & SOR_ZERO_INITIAL_GUESS) {
2554       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2555       its--;
2556     }
2557 
2558     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2559     while (its--) {
2560 
2561       /* lower triangular part: slvec0b = - B^T*xx */
2562       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2563 
2564       /* copy xx into slvec0a */
2565       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2566       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2567       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2568       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2569 
2570       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2571 
2572       /* copy bb into slvec1a */
2573       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2574       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2575       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2576       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2577 
2578       /* set slvec1b = 0 */
2579       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2580 
2581       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2582       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2583       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2584       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2585 
2586       /* upper triangular part: bb1 = bb1 - B*x */
2587       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2588 
2589       /* local diagonal sweep */
2590       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2591     }
2592     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2593   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2594     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2595   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)) {
2596     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2597   } else if (flag & SOR_EISENSTAT) {
2598     Vec               xx1;
2599     PetscBool         hasop;
2600     const PetscScalar *diag;
2601     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2602     PetscInt          i,n;
2603 
2604     if (!mat->xx1) {
2605       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2606       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2607     }
2608     xx1 = mat->xx1;
2609     bb1 = mat->bb1;
2610 
2611     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2612 
2613     if (!mat->diag) {
2614       /* this is wrong for same matrix with new nonzero values */
2615       ierr = MatGetVecs(matin,&mat->diag,NULL);CHKERRQ(ierr);
2616       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2617     }
2618     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2619 
2620     if (hasop) {
2621       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2622       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2623     } else {
2624       /*
2625           These two lines are replaced by code that may be a bit faster for a good compiler
2626       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2627       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2628       */
2629       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2630       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2631       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2632       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2633       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2634       if (omega == 1.0) {
2635         for (i=0; i<n; i++) sl[i] = b[i] - diag[i]*x[i];
2636         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2637       } else {
2638         for (i=0; i<n; i++) sl[i] = b[i] + scale*diag[i]*x[i];
2639         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2640       }
2641       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2642       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2643       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2644       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2645     }
2646 
2647     /* multiply off-diagonal portion of matrix */
2648     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2649     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2650     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2651     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2652     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2653     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2654     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2655     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2656     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2657     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2658 
2659     /* local sweep */
2660     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2661     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2662   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2663   PetscFunctionReturn(0);
2664 }
2665 
2666 #undef __FUNCT__
2667 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2668 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2669 {
2670   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2671   PetscErrorCode ierr;
2672   Vec            lvec1,bb1;
2673 
2674   PetscFunctionBegin;
2675   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2676   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2677 
2678   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP) {
2679     if (flag & SOR_ZERO_INITIAL_GUESS) {
2680       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2681       its--;
2682     }
2683 
2684     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2685     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2686     while (its--) {
2687       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688 
2689       /* lower diagonal part: bb1 = bb - B^T*xx */
2690       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2691       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2692 
2693       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2694       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2695       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2696 
2697       /* upper diagonal part: bb1 = bb1 - B*x */
2698       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2699       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2700 
2701       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2702 
2703       /* diagonal sweep */
2704       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2705     }
2706     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2707     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2708   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2709   PetscFunctionReturn(0);
2710 }
2711 
2712 #undef __FUNCT__
2713 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2714 /*@
2715      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2716          CSR format the local rows.
2717 
2718    Collective on MPI_Comm
2719 
2720    Input Parameters:
2721 +  comm - MPI communicator
2722 .  bs - the block size, only a block size of 1 is supported
2723 .  m - number of local rows (Cannot be PETSC_DECIDE)
2724 .  n - This value should be the same as the local size used in creating the
2725        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2726        calculated if N is given) For square matrices n is almost always m.
2727 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2728 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2729 .   i - row indices
2730 .   j - column indices
2731 -   a - matrix values
2732 
2733    Output Parameter:
2734 .   mat - the matrix
2735 
2736    Level: intermediate
2737 
2738    Notes:
2739        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2740      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2741      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2742 
2743        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2744 
2745 .keywords: matrix, aij, compressed row, sparse, parallel
2746 
2747 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2748           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2749 @*/
2750 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2751 {
2752   PetscErrorCode ierr;
2753 
2754 
2755   PetscFunctionBegin;
2756   if (i[0]) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2757   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2758   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2759   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2760   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2761   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2762   PetscFunctionReturn(0);
2763 }
2764 
2765 
2766 #undef __FUNCT__
2767 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2768 /*@C
2769    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2770    (the default parallel PETSc format).
2771 
2772    Collective on MPI_Comm
2773 
2774    Input Parameters:
2775 +  A - the matrix
2776 .  bs - the block size
2777 .  i - the indices into j for the start of each local row (starts with zero)
2778 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2779 -  v - optional values in the matrix
2780 
2781    Level: developer
2782 
2783 .keywords: matrix, aij, compressed row, sparse, parallel
2784 
2785 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2786 @*/
2787 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2788 {
2789   PetscErrorCode ierr;
2790 
2791   PetscFunctionBegin;
2792   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2793   PetscFunctionReturn(0);
2794 }
2795 
2796 
2797