xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 1f0e42cf17e8fdc56355134e13a5edcdb6dc1c19)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode MatDisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         mat->assembled = PETSC_FALSE;
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         if (im[i] > in[j]) {
287           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
288           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
289         }
290         /* If NumCol = 1 then a copy is not required */
291         if ((roworiented) && (n == 1)) {
292           barray = (MatScalar*) v + i*bs2;
293         } else if((!roworiented) && (m == 1)) {
294           barray = (MatScalar*) v + j*bs2;
295         } else { /* Here a copy is required */
296           if (roworiented) {
297             value = v + i*(stepval+bs)*bs + j*bs;
298           } else {
299             value = v + j*(stepval+bs)*bs + i*bs;
300           }
301           for (ii=0; ii<bs; ii++,value+=stepval) {
302             for (jj=0; jj<bs; jj++) {
303               *barray++  = *value++;
304             }
305           }
306           barray -=bs2;
307         }
308 
309         if (in[j] >= cstart && in[j] < cend){
310           col  = in[j] - cstart;
311           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
312         }
313         else if (in[j] < 0) continue;
314 #if defined(PETSC_USE_DEBUG)
315         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
316 #endif
317         else {
318           if (mat->was_assembled) {
319             if (!baij->colmap) {
320               ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
321             }
322 
323 #if defined(PETSC_USE_DEBUG)
324 #if defined (PETSC_USE_CTABLE)
325             { PetscInt data;
326               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
327               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
328             }
329 #else
330             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
331 #endif
332 #endif
333 #if defined (PETSC_USE_CTABLE)
334 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
335             col  = (col - 1)/bs;
336 #else
337             col = (baij->colmap[in[j]] - 1)/bs;
338 #endif
339             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
340               ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
341               col =  in[j];
342             }
343           }
344           else col = in[j];
345           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
346         }
347       }
348     } else {
349       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
350       if (!baij->donotstash) {
351         if (roworiented) {
352           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
353         } else {
354           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
355         }
356       }
357     }
358   }
359   PetscFunctionReturn(0);
360 }
361 
362 #undef __FUNCT__
363 #define __FUNCT__ "MatGetValues_MPISBAIJ"
364 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
365 {
366   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
367   PetscErrorCode ierr;
368   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
369   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
370 
371   PetscFunctionBegin;
372   for (i=0; i<m; i++) {
373     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
374     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
375     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
376       row = idxm[i] - bsrstart;
377       for (j=0; j<n; j++) {
378         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
379         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
380         if (idxn[j] >= bscstart && idxn[j] < bscend){
381           col = idxn[j] - bscstart;
382           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
383         } else {
384           if (!baij->colmap) {
385             ierr = MatCreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
386           }
387 #if defined (PETSC_USE_CTABLE)
388           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
389           data --;
390 #else
391           data = baij->colmap[idxn[j]/bs]-1;
392 #endif
393           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
394           else {
395             col  = data + idxn[j]%bs;
396             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
397           }
398         }
399       }
400     } else {
401       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
402     }
403   }
404  PetscFunctionReturn(0);
405 }
406 
407 #undef __FUNCT__
408 #define __FUNCT__ "MatNorm_MPISBAIJ"
409 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
410 {
411   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
412   PetscErrorCode ierr;
413   PetscReal      sum[2],*lnorm2;
414 
415   PetscFunctionBegin;
416   if (baij->size == 1) {
417     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
418   } else {
419     if (type == NORM_FROBENIUS) {
420       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
421       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
422       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
423       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
424       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
425       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
426       *norm = PetscSqrtReal(sum[0] + 2*sum[1]);
427       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
428     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
429       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
430       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
431       PetscReal    *rsum,*rsum2,vabs;
432       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
433       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
434       MatScalar    *v;
435 
436       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
437       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
438       /* Amat */
439       v = amat->a; jj = amat->j;
440       for (brow=0; brow<mbs; brow++) {
441         grow = bs*(rstart + brow);
442         nz = amat->i[brow+1] - amat->i[brow];
443         for (bcol=0; bcol<nz; bcol++){
444           gcol = bs*(rstart + *jj); jj++;
445           for (col=0; col<bs; col++){
446             for (row=0; row<bs; row++){
447               vabs = PetscAbsScalar(*v); v++;
448               rsum[gcol+col] += vabs;
449               /* non-diagonal block */
450               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
451             }
452           }
453         }
454       }
455       /* Bmat */
456       v = bmat->a; jj = bmat->j;
457       for (brow=0; brow<mbs; brow++) {
458         grow = bs*(rstart + brow);
459         nz = bmat->i[brow+1] - bmat->i[brow];
460         for (bcol=0; bcol<nz; bcol++){
461           gcol = bs*garray[*jj]; jj++;
462           for (col=0; col<bs; col++){
463             for (row=0; row<bs; row++){
464               vabs = PetscAbsScalar(*v); v++;
465               rsum[gcol+col] += vabs;
466               rsum[grow+row] += vabs;
467             }
468           }
469         }
470       }
471       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
472       *norm = 0.0;
473       for (col=0; col<mat->cmap->N; col++) {
474         if (rsum2[col] > *norm) *norm = rsum2[col];
475       }
476       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
477     } else {
478       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
479     }
480   }
481   PetscFunctionReturn(0);
482 }
483 
484 #undef __FUNCT__
485 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
486 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
487 {
488   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
489   PetscErrorCode ierr;
490   PetscInt       nstash,reallocs;
491   InsertMode     addv;
492 
493   PetscFunctionBegin;
494   if (baij->donotstash || mat->nooffprocentries) {
495     PetscFunctionReturn(0);
496   }
497 
498   /* make sure all processors are either in INSERTMODE or ADDMODE */
499   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
500   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
501   mat->insertmode = addv; /* in case this processor had no cache */
502 
503   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
504   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
508   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
509   PetscFunctionReturn(0);
510 }
511 
512 #undef __FUNCT__
513 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
514 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
515 {
516   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
517   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
518   PetscErrorCode ierr;
519   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
520   PetscInt       *row,*col;
521   PetscBool      other_disassembled;
522   PetscMPIInt    n;
523   PetscBool      r1,r2,r3;
524   MatScalar      *val;
525   InsertMode     addv = mat->insertmode;
526 
527   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
528   PetscFunctionBegin;
529 
530   if (!baij->donotstash &&  !mat->nooffprocentries) {
531     while (1) {
532       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
533       if (!flg) break;
534 
535       for (i=0; i<n;) {
536         /* Now identify the consecutive vals belonging to the same row */
537         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
538         if (j < n) ncols = j-i;
539         else       ncols = n-i;
540         /* Now assemble all these values with a single function call */
541         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
542         i = j;
543       }
544     }
545     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
546     /* Now process the block-stash. Since the values are stashed column-oriented,
547        set the roworiented flag to column oriented, and after MatSetValues()
548        restore the original flags */
549     r1 = baij->roworiented;
550     r2 = a->roworiented;
551     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
552     baij->roworiented = PETSC_FALSE;
553     a->roworiented    = PETSC_FALSE;
554     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
555     while (1) {
556       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
557       if (!flg) break;
558 
559       for (i=0; i<n;) {
560         /* Now identify the consecutive vals belonging to the same row */
561         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
562         if (j < n) ncols = j-i;
563         else       ncols = n-i;
564         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
565         i = j;
566       }
567     }
568     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
569     baij->roworiented = r1;
570     a->roworiented    = r2;
571     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
572   }
573 
574   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
575   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
576 
577   /* determine if any processor has disassembled, if so we must
578      also disassemble ourselfs, in order that we may reassemble. */
579   /*
580      if nonzero structure of submatrix B cannot change then we know that
581      no processor disassembled thus we can skip this stuff
582   */
583   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
584     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
585     if (mat->was_assembled && !other_disassembled) {
586       ierr = MatDisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
587     }
588   }
589 
590   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
591     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
592   }
593   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
594   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
595   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
596 
597   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
598   baij->rowvalues = 0;
599 
600   PetscFunctionReturn(0);
601 }
602 
603 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
604 #undef __FUNCT__
605 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
606 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
607 {
608   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode    ierr;
610   PetscInt          bs = mat->rmap->bs;
611   PetscMPIInt       size = baij->size,rank = baij->rank;
612   PetscBool         iascii,isdraw;
613   PetscViewer       sviewer;
614   PetscViewerFormat format;
615 
616   PetscFunctionBegin;
617   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
618   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
619   if (iascii) {
620     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
621     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
622       MatInfo info;
623       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
624       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
626       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
627       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
628       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
629       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
630       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
631       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
632       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
633       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
634       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
635       PetscFunctionReturn(0);
636     } else if (format == PETSC_VIEWER_ASCII_INFO) {
637       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
638       PetscFunctionReturn(0);
639     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
640       PetscFunctionReturn(0);
641     }
642   }
643 
644   if (isdraw) {
645     PetscDraw  draw;
646     PetscBool  isnull;
647     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
648     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
649   }
650 
651   if (size == 1) {
652     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
653     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
654   } else {
655     /* assemble the entire matrix onto first processor. */
656     Mat          A;
657     Mat_SeqSBAIJ *Aloc;
658     Mat_SeqBAIJ  *Bloc;
659     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
660     MatScalar    *a;
661 
662     /* Should this be the same type as mat? */
663     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
664     if (!rank) {
665       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
666     } else {
667       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
668     }
669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
670     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
671     ierr = MatSetOption(A,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr);
672     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
673 
674     /* copy over the A part */
675     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
676     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
677     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
678 
679     for (i=0; i<mbs; i++) {
680       rvals[0] = bs*(baij->rstartbs + i);
681       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
682       for (j=ai[i]; j<ai[i+1]; j++) {
683         col = (baij->cstartbs+aj[j])*bs;
684         for (k=0; k<bs; k++) {
685           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
686           col++; a += bs;
687         }
688       }
689     }
690     /* copy over the B part */
691     Bloc = (Mat_SeqBAIJ*)baij->B->data;
692     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
693     for (i=0; i<mbs; i++) {
694 
695       rvals[0] = bs*(baij->rstartbs + i);
696       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
697       for (j=ai[i]; j<ai[i+1]; j++) {
698         col = baij->garray[aj[j]]*bs;
699         for (k=0; k<bs; k++) {
700           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
701           col++; a += bs;
702         }
703       }
704     }
705     ierr = PetscFree(rvals);CHKERRQ(ierr);
706     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
707     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
708     /*
709        Everyone has to call to draw the matrix since the graphics waits are
710        synchronized across all processors that share the PetscDraw object
711     */
712     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
713     if (!rank) {
714       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
715           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
716       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
717       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
718     }
719     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
720     ierr = MatDestroy(&A);CHKERRQ(ierr);
721   }
722   PetscFunctionReturn(0);
723 }
724 
725 #undef __FUNCT__
726 #define __FUNCT__ "MatView_MPISBAIJ"
727 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
728 {
729   PetscErrorCode ierr;
730   PetscBool      iascii,isdraw,issocket,isbinary;
731 
732   PetscFunctionBegin;
733   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
734   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
735   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
736   ierr = PetscObjectTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
737   if (iascii || isdraw || issocket || isbinary) {
738     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
739   } else {
740     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
741   }
742   PetscFunctionReturn(0);
743 }
744 
745 #undef __FUNCT__
746 #define __FUNCT__ "MatDestroy_MPISBAIJ"
747 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
748 {
749   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
750   PetscErrorCode ierr;
751 
752   PetscFunctionBegin;
753 #if defined(PETSC_USE_LOG)
754   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
755 #endif
756   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
757   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
758   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
759   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
760 #if defined (PETSC_USE_CTABLE)
761   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
762 #else
763   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
764 #endif
765   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
767   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
771   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
772   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
773   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
774   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
775   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
776   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
778   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
779   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
780 #if defined(PETSC_USE_REAL_MAT_SINGLE)
781   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
782 #endif
783   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
784   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
786   ierr = PetscFree(mat->data);CHKERRQ(ierr);
787 
788   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(((PetscObject)A)->comm);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952 
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) { max = tmp; }
1011     }
1012     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap  = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart)
1049               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050             else break;
1051           }
1052           imark = i;
1053         }
1054         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1055         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056       }
1057     } else {
1058       if (idx) *idx = 0;
1059       if (v)   *v   = 0;
1060     }
1061   }
1062   *nz = nztot;
1063   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1076   baij->getrowactive = PETSC_FALSE;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1082 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1086 
1087   PetscFunctionBegin;
1088   aA->getrow_utriangular = PETSC_TRUE;
1089   PetscFunctionReturn(0);
1090 }
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1105 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1106 {
1107   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1118 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->rmap->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1210     break;
1211   case MAT_NEW_DIAGONALS:
1212     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = flg;
1216     break;
1217   case MAT_USE_HASH_TABLE:
1218     a->ht_flag = flg;
1219     break;
1220   case MAT_HERMITIAN:
1221     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set                         = PETSC_TRUE;
1227     A->spd                             = flg;
1228     if (flg) {
1229       A->symmetric                     = PETSC_TRUE;
1230       A->structurally_symmetric        = PETSC_TRUE;
1231       A->symmetric_set                 = PETSC_TRUE;
1232       A->structurally_symmetric_set    = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscBool      flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1286   }
1287   if (!ll) PetscFunctionReturn(0);
1288 
1289   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1290   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1291 
1292   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1293   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1294 
1295   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296 
1297   /* left diagonalscale the off-diagonal part */
1298   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1299 
1300   /* scale the diagonal part */
1301   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1302 
1303   /* right diagonalscale the off-diagonal part */
1304   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1311 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatEqual_MPISBAIJ"
1325 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1326 {
1327   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1328   Mat            a,b,c,d;
1329   PetscBool      flg;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPISBAIJ"
1346 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1350   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1351 
1352   PetscFunctionBegin;
1353   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1354   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1355     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1358   } else {
1359     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1360     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUp_MPISBAIJ"
1367 PetscErrorCode MatSetUp_MPISBAIJ(Mat A)
1368 {
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatMPISBAIJSetPreallocation(A,A->rmap->bs,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa = (Mat_SeqSBAIJ *)xx->A->data;
1390     ya = (Mat_SeqSBAIJ *)yy->A->data;
1391     bnz = PetscBLASIntCast(xa->nz);
1392     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1393     xb = (Mat_SeqBAIJ *)xx->B->data;
1394     yb = (Mat_SeqBAIJ *)yy->B->data;
1395     bnz = PetscBLASIntCast(xb->nz);
1396     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1397   } else {
1398     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1407 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1408 {
1409   PetscErrorCode ierr;
1410   PetscInt       i;
1411   PetscBool      flg;
1412 
1413   PetscFunctionBegin;
1414   for (i=0; i<n; i++) {
1415     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1416     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1417   }
1418   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 
1423 /* -------------------------------------------------------------------*/
1424 static struct _MatOps MatOps_Values = {
1425        MatSetValues_MPISBAIJ,
1426        MatGetRow_MPISBAIJ,
1427        MatRestoreRow_MPISBAIJ,
1428        MatMult_MPISBAIJ,
1429 /* 4*/ MatMultAdd_MPISBAIJ,
1430        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1431        MatMultAdd_MPISBAIJ,
1432        0,
1433        0,
1434        0,
1435 /*10*/ 0,
1436        0,
1437        0,
1438        MatSOR_MPISBAIJ,
1439        MatTranspose_MPISBAIJ,
1440 /*15*/ MatGetInfo_MPISBAIJ,
1441        MatEqual_MPISBAIJ,
1442        MatGetDiagonal_MPISBAIJ,
1443        MatDiagonalScale_MPISBAIJ,
1444        MatNorm_MPISBAIJ,
1445 /*20*/ MatAssemblyBegin_MPISBAIJ,
1446        MatAssemblyEnd_MPISBAIJ,
1447        MatSetOption_MPISBAIJ,
1448        MatZeroEntries_MPISBAIJ,
1449 /*24*/ 0,
1450        0,
1451        0,
1452        0,
1453        0,
1454 /*29*/ MatSetUp_MPISBAIJ,
1455        0,
1456        0,
1457        0,
1458        0,
1459 /*34*/ MatDuplicate_MPISBAIJ,
1460        0,
1461        0,
1462        0,
1463        0,
1464 /*39*/ MatAXPY_MPISBAIJ,
1465        MatGetSubMatrices_MPISBAIJ,
1466        MatIncreaseOverlap_MPISBAIJ,
1467        MatGetValues_MPISBAIJ,
1468        MatCopy_MPISBAIJ,
1469 /*44*/ 0,
1470        MatScale_MPISBAIJ,
1471        0,
1472        0,
1473        0,
1474 /*49*/ 0,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*54*/ 0,
1480        0,
1481        MatSetUnfactored_MPISBAIJ,
1482        0,
1483        MatSetValuesBlocked_MPISBAIJ,
1484 /*59*/ 0,
1485        0,
1486        0,
1487        0,
1488        0,
1489 /*64*/ 0,
1490        0,
1491        0,
1492        0,
1493        0,
1494 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1495        0,
1496        0,
1497        0,
1498        0,
1499 /*74*/ 0,
1500        0,
1501        0,
1502        0,
1503        0,
1504 /*79*/ 0,
1505        0,
1506        0,
1507        0,
1508        MatLoad_MPISBAIJ,
1509 /*84*/ 0,
1510        0,
1511        0,
1512        0,
1513        0,
1514 /*89*/ 0,
1515        0,
1516        0,
1517        0,
1518        0,
1519 /*94*/ 0,
1520        0,
1521        0,
1522        0,
1523        0,
1524 /*99*/ 0,
1525        0,
1526        0,
1527        0,
1528        0,
1529 /*104*/0,
1530        MatRealPart_MPISBAIJ,
1531        MatImaginaryPart_MPISBAIJ,
1532        MatGetRowUpperTriangular_MPISBAIJ,
1533        MatRestoreRowUpperTriangular_MPISBAIJ,
1534 /*109*/0,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*114*/0,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*119*/0,
1545        0,
1546        0,
1547        0
1548 };
1549 
1550 
1551 EXTERN_C_BEGIN
1552 #undef __FUNCT__
1553 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1554 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1555 {
1556   PetscFunctionBegin;
1557   *a = ((Mat_MPISBAIJ *)A->data)->A;
1558   PetscFunctionReturn(0);
1559 }
1560 EXTERN_C_END
1561 
1562 EXTERN_C_BEGIN
1563 #undef __FUNCT__
1564 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1565 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1566 {
1567   Mat_MPISBAIJ   *b;
1568   PetscErrorCode ierr;
1569   PetscInt       i,mbs,Mbs;
1570 
1571   PetscFunctionBegin;
1572   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1573   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1574   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1575   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1576 
1577   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1578   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1579   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1580   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1581   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1582 
1583   if (d_nnz) {
1584     for (i=0; i<B->rmap->n/bs; i++) {
1585       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1586     }
1587   }
1588   if (o_nnz) {
1589     for (i=0; i<B->rmap->n/bs; i++) {
1590       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1591     }
1592   }
1593 
1594   b   = (Mat_MPISBAIJ*)B->data;
1595   mbs = B->rmap->n/bs;
1596   Mbs = B->rmap->N/bs;
1597   if (mbs*bs != B->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1598 
1599   B->rmap->bs  = bs;
1600   b->bs2 = bs*bs;
1601   b->mbs = mbs;
1602   b->nbs = mbs;
1603   b->Mbs = Mbs;
1604   b->Nbs = Mbs;
1605 
1606   for (i=0; i<=b->size; i++) {
1607     b->rangebs[i] = B->rmap->range[i]/bs;
1608   }
1609   b->rstartbs = B->rmap->rstart/bs;
1610   b->rendbs   = B->rmap->rend/bs;
1611 
1612   b->cstartbs = B->cmap->rstart/bs;
1613   b->cendbs   = B->cmap->rend/bs;
1614 
1615   if (!B->preallocated) {
1616     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1617     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1618     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1619     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1620     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1621     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1622     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1623     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1624     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1625   }
1626 
1627   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1628   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1629   B->preallocated = PETSC_TRUE;
1630   PetscFunctionReturn(0);
1631 }
1632 EXTERN_C_END
1633 
1634 EXTERN_C_BEGIN
1635 #undef __FUNCT__
1636 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1637 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1638 {
1639   PetscInt       m,rstart,cstart,cend;
1640   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1641   const PetscInt *JJ=0;
1642   PetscScalar    *values=0;
1643   PetscErrorCode ierr;
1644 
1645   PetscFunctionBegin;
1646 
1647   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1648   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1649   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1650   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1651   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1652   ierr = PetscLayoutGetBlockSize(B->rmap,&bs);CHKERRQ(ierr);
1653   m      = B->rmap->n/bs;
1654   rstart = B->rmap->rstart/bs;
1655   cstart = B->cmap->rstart/bs;
1656   cend   = B->cmap->rend/bs;
1657 
1658   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1659   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1660   for (i=0; i<m; i++) {
1661     nz = ii[i+1] - ii[i];
1662     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1663     nz_max = PetscMax(nz_max,nz);
1664     JJ  = jj + ii[i];
1665     for (j=0; j<nz; j++) {
1666       if (*JJ >= cstart) break;
1667       JJ++;
1668     }
1669     d = 0;
1670     for (; j<nz; j++) {
1671       if (*JJ++ >= cend) break;
1672       d++;
1673     }
1674     d_nnz[i] = d;
1675     o_nnz[i] = nz - d;
1676   }
1677   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1678   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1679 
1680   values = (PetscScalar*)V;
1681   if (!values) {
1682     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1683     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1684   }
1685   for (i=0; i<m; i++) {
1686     PetscInt          row    = i + rstart;
1687     PetscInt          ncols  = ii[i+1] - ii[i];
1688     const PetscInt    *icols = jj + ii[i];
1689     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1690     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1691   }
1692 
1693   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1694   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1695   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1696   ierr = MatSetOption(B,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr);
1697   PetscFunctionReturn(0);
1698 }
1699 EXTERN_C_END
1700 
1701 EXTERN_C_BEGIN
1702 #if defined(PETSC_HAVE_MUMPS)
1703 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1704 #endif
1705 #if defined(PETSC_HAVE_SPOOLES)
1706 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1707 #endif
1708 #if defined(PETSC_HAVE_PASTIX)
1709 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1710 #endif
1711 EXTERN_C_END
1712 
1713 /*MC
1714    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1715    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1716    the matrix is stored.
1717 
1718   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1719   can call MatSetOption(Mat, MAT_HERMITIAN);
1720 
1721    Options Database Keys:
1722 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1723 
1724   Level: beginner
1725 
1726 .seealso: MatCreateMPISBAIJ
1727 M*/
1728 
1729 EXTERN_C_BEGIN
1730 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*);
1731 EXTERN_C_END
1732 
1733 EXTERN_C_BEGIN
1734 #undef __FUNCT__
1735 #define __FUNCT__ "MatCreate_MPISBAIJ"
1736 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1737 {
1738   Mat_MPISBAIJ   *b;
1739   PetscErrorCode ierr;
1740   PetscBool      flg;
1741 
1742   PetscFunctionBegin;
1743 
1744   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1745   B->data = (void*)b;
1746   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1747 
1748   B->ops->destroy    = MatDestroy_MPISBAIJ;
1749   B->ops->view       = MatView_MPISBAIJ;
1750   B->assembled       = PETSC_FALSE;
1751 
1752   B->insertmode = NOT_SET_VALUES;
1753   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1754   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1755 
1756   /* build local table of row and column ownerships */
1757   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1758 
1759   /* build cache for off array entries formed */
1760   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1761   b->donotstash  = PETSC_FALSE;
1762   b->colmap      = PETSC_NULL;
1763   b->garray      = PETSC_NULL;
1764   b->roworiented = PETSC_TRUE;
1765 
1766   /* stuff used in block assembly */
1767   b->barray       = 0;
1768 
1769   /* stuff used for matrix vector multiply */
1770   b->lvec         = 0;
1771   b->Mvctx        = 0;
1772   b->slvec0       = 0;
1773   b->slvec0b      = 0;
1774   b->slvec1       = 0;
1775   b->slvec1a      = 0;
1776   b->slvec1b      = 0;
1777   b->sMvctx       = 0;
1778 
1779   /* stuff for MatGetRow() */
1780   b->rowindices   = 0;
1781   b->rowvalues    = 0;
1782   b->getrowactive = PETSC_FALSE;
1783 
1784   /* hash table stuff */
1785   b->ht           = 0;
1786   b->hd           = 0;
1787   b->ht_size      = 0;
1788   b->ht_flag      = PETSC_FALSE;
1789   b->ht_fact      = 0;
1790   b->ht_total_ct  = 0;
1791   b->ht_insert_ct = 0;
1792 
1793   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1794   b->ijonly       = PETSC_FALSE;
1795 
1796   b->in_loc       = 0;
1797   b->v_loc        = 0;
1798   b->n_loc        = 0;
1799   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1800     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1801     if (flg) {
1802       PetscReal fact = 1.39;
1803       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1804       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1805       if (fact <= 1.0) fact = 1.39;
1806       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1807       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1808     }
1809   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1810 
1811 #if defined(PETSC_HAVE_PASTIX)
1812   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1813 					   "MatGetFactor_mpisbaij_pastix",
1814 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1815 #endif
1816 #if defined(PETSC_HAVE_MUMPS)
1817   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1818                                      "MatGetFactor_sbaij_mumps",
1819                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1820 #endif
1821 #if defined(PETSC_HAVE_SPOOLES)
1822   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1823                                      "MatGetFactor_mpisbaij_spooles",
1824                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1825 #endif
1826   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1827                                      "MatStoreValues_MPISBAIJ",
1828                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1829   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1830                                      "MatRetrieveValues_MPISBAIJ",
1831                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1833                                      "MatGetDiagonalBlock_MPISBAIJ",
1834                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1835   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1836                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1837                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1838   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1839                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1840                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1842                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1843                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1844 
1845   B->symmetric                  = PETSC_TRUE;
1846   B->structurally_symmetric     = PETSC_TRUE;
1847   B->symmetric_set              = PETSC_TRUE;
1848   B->structurally_symmetric_set = PETSC_TRUE;
1849   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1850   PetscFunctionReturn(0);
1851 }
1852 EXTERN_C_END
1853 
1854 /*MC
1855    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1856 
1857    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1858    and MATMPISBAIJ otherwise.
1859 
1860    Options Database Keys:
1861 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1862 
1863   Level: beginner
1864 
1865 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1866 M*/
1867 
1868 #undef __FUNCT__
1869 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1870 /*@C
1871    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1872    the user should preallocate the matrix storage by setting the parameters
1873    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1874    performance can be increased by more than a factor of 50.
1875 
1876    Collective on Mat
1877 
1878    Input Parameters:
1879 +  A - the matrix
1880 .  bs   - size of blockk
1881 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1882            submatrix  (same for all local rows)
1883 .  d_nnz - array containing the number of block nonzeros in the various block rows
1884            in the upper triangular and diagonal part of the in diagonal portion of the local
1885            (possibly different for each block row) or PETSC_NULL.  If you plan to factor the matrix you must leave room
1886            for the diagonal entry and set a value even if it is zero.
1887 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1888            submatrix (same for all local rows).
1889 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1890            off-diagonal portion of the local submatrix that is right of the diagonal
1891            (possibly different for each block row) or PETSC_NULL.
1892 
1893 
1894    Options Database Keys:
1895 .   -mat_no_unroll - uses code that does not unroll the loops in the
1896                      block calculations (much slower)
1897 .   -mat_block_size - size of the blocks to use
1898 
1899    Notes:
1900 
1901    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1902    than it must be used on all processors that share the object for that argument.
1903 
1904    If the *_nnz parameter is given then the *_nz parameter is ignored
1905 
1906    Storage Information:
1907    For a square global matrix we define each processor's diagonal portion
1908    to be its local rows and the corresponding columns (a square submatrix);
1909    each processor's off-diagonal portion encompasses the remainder of the
1910    local matrix (a rectangular submatrix).
1911 
1912    The user can specify preallocated storage for the diagonal part of
1913    the local submatrix with either d_nz or d_nnz (not both).  Set
1914    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1915    memory allocation.  Likewise, specify preallocated storage for the
1916    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1917 
1918    You can call MatGetInfo() to get information on how effective the preallocation was;
1919    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1920    You can also run with the option -info and look for messages with the string
1921    malloc in them to see if additional memory allocation was needed.
1922 
1923    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1924    the figure below we depict these three local rows and all columns (0-11).
1925 
1926 .vb
1927            0 1 2 3 4 5 6 7 8 9 10 11
1928           -------------------
1929    row 3  |  . . . d d d o o o o o o
1930    row 4  |  . . . d d d o o o o o o
1931    row 5  |  . . . d d d o o o o o o
1932           -------------------
1933 .ve
1934 
1935    Thus, any entries in the d locations are stored in the d (diagonal)
1936    submatrix, and any entries in the o locations are stored in the
1937    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1938    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1939 
1940    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1941    plus the diagonal part of the d matrix,
1942    and o_nz should indicate the number of block nonzeros per row in the o matrix
1943 
1944    In general, for PDE problems in which most nonzeros are near the diagonal,
1945    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1946    or you will get TERRIBLE performance; see the users' manual chapter on
1947    matrices.
1948 
1949    Level: intermediate
1950 
1951 .keywords: matrix, block, aij, compressed row, sparse, parallel
1952 
1953 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
1954 @*/
1955 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1956 {
1957   PetscErrorCode ierr;
1958 
1959   PetscFunctionBegin;
1960   PetscValidHeaderSpecific(B,MAT_CLASSID,1);
1961   PetscValidType(B,1);
1962   PetscValidLogicalCollectiveInt(B,bs,2);
1963   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1964   PetscFunctionReturn(0);
1965 }
1966 
1967 #undef __FUNCT__
1968 #define __FUNCT__ "MatCreateSBAIJ"
1969 /*@C
1970    MatCreateSBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1971    (block compressed row).  For good matrix assembly performance
1972    the user should preallocate the matrix storage by setting the parameters
1973    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1974    performance can be increased by more than a factor of 50.
1975 
1976    Collective on MPI_Comm
1977 
1978    Input Parameters:
1979 +  comm - MPI communicator
1980 .  bs   - size of blockk
1981 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1982            This value should be the same as the local size used in creating the
1983            y vector for the matrix-vector product y = Ax.
1984 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1985            This value should be the same as the local size used in creating the
1986            x vector for the matrix-vector product y = Ax.
1987 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1988 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1989 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1990            submatrix  (same for all local rows)
1991 .  d_nnz - array containing the number of block nonzeros in the various block rows
1992            in the upper triangular portion of the in diagonal portion of the local
1993            (possibly different for each block block row) or PETSC_NULL.
1994            If you plan to factor the matrix you must leave room for the diagonal entry and
1995            set its value even if it is zero.
1996 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1997            submatrix (same for all local rows).
1998 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1999            off-diagonal portion of the local submatrix (possibly different for
2000            each block row) or PETSC_NULL.
2001 
2002    Output Parameter:
2003 .  A - the matrix
2004 
2005    Options Database Keys:
2006 .   -mat_no_unroll - uses code that does not unroll the loops in the
2007                      block calculations (much slower)
2008 .   -mat_block_size - size of the blocks to use
2009 .   -mat_mpi - use the parallel matrix data structures even on one processor
2010                (defaults to using SeqBAIJ format on one processor)
2011 
2012    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2013    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2014    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2015 
2016    Notes:
2017    The number of rows and columns must be divisible by blocksize.
2018    This matrix type does not support complex Hermitian operation.
2019 
2020    The user MUST specify either the local or global matrix dimensions
2021    (possibly both).
2022 
2023    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2024    than it must be used on all processors that share the object for that argument.
2025 
2026    If the *_nnz parameter is given then the *_nz parameter is ignored
2027 
2028    Storage Information:
2029    For a square global matrix we define each processor's diagonal portion
2030    to be its local rows and the corresponding columns (a square submatrix);
2031    each processor's off-diagonal portion encompasses the remainder of the
2032    local matrix (a rectangular submatrix).
2033 
2034    The user can specify preallocated storage for the diagonal part of
2035    the local submatrix with either d_nz or d_nnz (not both).  Set
2036    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2037    memory allocation.  Likewise, specify preallocated storage for the
2038    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2039 
2040    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2041    the figure below we depict these three local rows and all columns (0-11).
2042 
2043 .vb
2044            0 1 2 3 4 5 6 7 8 9 10 11
2045           -------------------
2046    row 3  |  . . . d d d o o o o o o
2047    row 4  |  . . . d d d o o o o o o
2048    row 5  |  . . . d d d o o o o o o
2049           -------------------
2050 .ve
2051 
2052    Thus, any entries in the d locations are stored in the d (diagonal)
2053    submatrix, and any entries in the o locations are stored in the
2054    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2055    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2056 
2057    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2058    plus the diagonal part of the d matrix,
2059    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2060    In general, for PDE problems in which most nonzeros are near the diagonal,
2061    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2062    or you will get TERRIBLE performance; see the users' manual chapter on
2063    matrices.
2064 
2065    Level: intermediate
2066 
2067 .keywords: matrix, block, aij, compressed row, sparse, parallel
2068 
2069 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateBAIJ()
2070 @*/
2071 
2072 PetscErrorCode  MatCreateSBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2073 {
2074   PetscErrorCode ierr;
2075   PetscMPIInt    size;
2076 
2077   PetscFunctionBegin;
2078   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2079   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2080   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2081   if (size > 1) {
2082     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2083     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2084   } else {
2085     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2086     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2087   }
2088   PetscFunctionReturn(0);
2089 }
2090 
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2094 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2095 {
2096   Mat            mat;
2097   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2098   PetscErrorCode ierr;
2099   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2100   PetscScalar    *array;
2101 
2102   PetscFunctionBegin;
2103   *newmat       = 0;
2104   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2105   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2106   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2107   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2108   ierr = PetscLayoutReference(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2109   ierr = PetscLayoutReference(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2110 
2111   mat->factortype   = matin->factortype;
2112   mat->preallocated = PETSC_TRUE;
2113   mat->assembled    = PETSC_TRUE;
2114   mat->insertmode   = NOT_SET_VALUES;
2115 
2116   a = (Mat_MPISBAIJ*)mat->data;
2117   a->bs2   = oldmat->bs2;
2118   a->mbs   = oldmat->mbs;
2119   a->nbs   = oldmat->nbs;
2120   a->Mbs   = oldmat->Mbs;
2121   a->Nbs   = oldmat->Nbs;
2122 
2123 
2124   a->size         = oldmat->size;
2125   a->rank         = oldmat->rank;
2126   a->donotstash   = oldmat->donotstash;
2127   a->roworiented  = oldmat->roworiented;
2128   a->rowindices   = 0;
2129   a->rowvalues    = 0;
2130   a->getrowactive = PETSC_FALSE;
2131   a->barray       = 0;
2132   a->rstartbs    = oldmat->rstartbs;
2133   a->rendbs      = oldmat->rendbs;
2134   a->cstartbs    = oldmat->cstartbs;
2135   a->cendbs      = oldmat->cendbs;
2136 
2137   /* hash table stuff */
2138   a->ht           = 0;
2139   a->hd           = 0;
2140   a->ht_size      = 0;
2141   a->ht_flag      = oldmat->ht_flag;
2142   a->ht_fact      = oldmat->ht_fact;
2143   a->ht_total_ct  = 0;
2144   a->ht_insert_ct = 0;
2145 
2146   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2147   if (oldmat->colmap) {
2148 #if defined (PETSC_USE_CTABLE)
2149     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2150 #else
2151     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2152     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2153     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2154 #endif
2155   } else a->colmap = 0;
2156 
2157   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2158     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2159     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2160     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2161   } else a->garray = 0;
2162 
2163   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2164   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2165   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2166   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2167   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2168 
2169   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2170   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2171   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2172   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2173 
2174   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2175   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2176   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2177   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2178   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2179   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2180   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2181   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2182   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2183   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2184   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2185   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2186   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2187 
2188   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2189   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2190   a->sMvctx = oldmat->sMvctx;
2191   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2192 
2193   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2194   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2195   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2196   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2197   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2198   *newmat = mat;
2199   PetscFunctionReturn(0);
2200 }
2201 
2202 #undef __FUNCT__
2203 #define __FUNCT__ "MatLoad_MPISBAIJ"
2204 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2205 {
2206   PetscErrorCode ierr;
2207   PetscInt       i,nz,j,rstart,rend;
2208   PetscScalar    *vals,*buf;
2209   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2210   MPI_Status     status;
2211   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2212   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2213   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2214   PetscInt       bs=1,Mbs,mbs,extra_rows;
2215   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2216   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2217   int            fd;
2218 
2219   PetscFunctionBegin;
2220   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2221     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2222   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2223 
2224   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2225   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2226   if (!rank) {
2227     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2228     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2229     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2230     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2231   }
2232 
2233   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2234 
2235   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2236   M = header[1]; N = header[2];
2237 
2238   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2239   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2240   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2241 
2242   /* If global sizes are set, check if they are consistent with that given in the file */
2243   if (sizesset) {
2244     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2245   }
2246   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2247   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2248 
2249   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2250 
2251   /*
2252      This code adds extra rows to make sure the number of rows is
2253      divisible by the blocksize
2254   */
2255   Mbs        = M/bs;
2256   extra_rows = bs - M + bs*(Mbs);
2257   if (extra_rows == bs) extra_rows = 0;
2258   else                  Mbs++;
2259   if (extra_rows &&!rank) {
2260     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2261   }
2262 
2263   /* determine ownership of all rows */
2264   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2265     mbs        = Mbs/size + ((Mbs % size) > rank);
2266     m          = mbs*bs;
2267   } else { /* User Set */
2268     m          = newmat->rmap->n;
2269     mbs        = m/bs;
2270   }
2271   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2272   mmbs       = PetscMPIIntCast(mbs);
2273   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2274   rowners[0] = 0;
2275   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2276   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2277   rstart = rowners[rank];
2278   rend   = rowners[rank+1];
2279 
2280   /* distribute row lengths to all processors */
2281   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2282   if (!rank) {
2283     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2284     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2285     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2286     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2287     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2288     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2289     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2290   } else {
2291     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2292   }
2293 
2294   if (!rank) {   /* procs[0] */
2295     /* calculate the number of nonzeros on each processor */
2296     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2297     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2298     for (i=0; i<size; i++) {
2299       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2300         procsnz[i] += rowlengths[j];
2301       }
2302     }
2303     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2304 
2305     /* determine max buffer needed and allocate it */
2306     maxnz = 0;
2307     for (i=0; i<size; i++) {
2308       maxnz = PetscMax(maxnz,procsnz[i]);
2309     }
2310     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2311 
2312     /* read in my part of the matrix column indices  */
2313     nz     = procsnz[0];
2314     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2315     mycols = ibuf;
2316     if (size == 1)  nz -= extra_rows;
2317     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2318     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2319 
2320     /* read in every ones (except the last) and ship off */
2321     for (i=1; i<size-1; i++) {
2322       nz   = procsnz[i];
2323       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2324       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2325     }
2326     /* read in the stuff for the last proc */
2327     if (size != 1) {
2328       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2329       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2330       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2331       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2332     }
2333     ierr = PetscFree(cols);CHKERRQ(ierr);
2334   } else {  /* procs[i], i>0 */
2335     /* determine buffer space needed for message */
2336     nz = 0;
2337     for (i=0; i<m; i++) {
2338       nz += locrowlens[i];
2339     }
2340     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2341     mycols = ibuf;
2342     /* receive message of column indices*/
2343     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2344     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2345     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2346   }
2347 
2348   /* loop over local rows, determining number of off diagonal entries */
2349   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2350   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2351   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2352   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2353   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2354   rowcount = 0;
2355   nzcount  = 0;
2356   for (i=0; i<mbs; i++) {
2357     dcount  = 0;
2358     odcount = 0;
2359     for (j=0; j<bs; j++) {
2360       kmax = locrowlens[rowcount];
2361       for (k=0; k<kmax; k++) {
2362         tmp = mycols[nzcount++]/bs; /* block col. index */
2363         if (!mask[tmp]) {
2364           mask[tmp] = 1;
2365           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2366           else masked1[dcount++] = tmp; /* entry in diag portion */
2367         }
2368       }
2369       rowcount++;
2370     }
2371 
2372     dlens[i]  = dcount;  /* d_nzz[i] */
2373     odlens[i] = odcount; /* o_nzz[i] */
2374 
2375     /* zero out the mask elements we set */
2376     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2377     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2378   }
2379     if (!sizesset) {
2380     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2381   }
2382   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2383   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2384 
2385   if (!rank) {
2386     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2387     /* read in my part of the matrix numerical values  */
2388     nz = procsnz[0];
2389     vals = buf;
2390     mycols = ibuf;
2391     if (size == 1)  nz -= extra_rows;
2392     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2393     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2394 
2395     /* insert into matrix */
2396     jj      = rstart*bs;
2397     for (i=0; i<m; i++) {
2398       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2399       mycols += locrowlens[i];
2400       vals   += locrowlens[i];
2401       jj++;
2402     }
2403 
2404     /* read in other processors (except the last one) and ship out */
2405     for (i=1; i<size-1; i++) {
2406       nz   = procsnz[i];
2407       vals = buf;
2408       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2409       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2410     }
2411     /* the last proc */
2412     if (size != 1){
2413       nz   = procsnz[i] - extra_rows;
2414       vals = buf;
2415       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2416       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2417       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2418     }
2419     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2420 
2421   } else {
2422     /* receive numeric values */
2423     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2424 
2425     /* receive message of values*/
2426     vals   = buf;
2427     mycols = ibuf;
2428     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2429     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2430     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2431 
2432     /* insert into matrix */
2433     jj      = rstart*bs;
2434     for (i=0; i<m; i++) {
2435       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2436       mycols += locrowlens[i];
2437       vals   += locrowlens[i];
2438       jj++;
2439     }
2440   }
2441 
2442   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2443   ierr = PetscFree(buf);CHKERRQ(ierr);
2444   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2445   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2446   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2447   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2448   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2449   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2450   PetscFunctionReturn(0);
2451 }
2452 
2453 #undef __FUNCT__
2454 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2455 /*XXXXX@
2456    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2457 
2458    Input Parameters:
2459 .  mat  - the matrix
2460 .  fact - factor
2461 
2462    Not Collective on Mat, each process can have a different hash factor
2463 
2464    Level: advanced
2465 
2466   Notes:
2467    This can also be set by the command line option: -mat_use_hash_table fact
2468 
2469 .keywords: matrix, hashtable, factor, HT
2470 
2471 .seealso: MatSetOption()
2472 @XXXXX*/
2473 
2474 
2475 #undef __FUNCT__
2476 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2477 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2478 {
2479   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2480   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2481   PetscReal      atmp;
2482   PetscReal      *work,*svalues,*rvalues;
2483   PetscErrorCode ierr;
2484   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2485   PetscMPIInt    rank,size;
2486   PetscInt       *rowners_bs,dest,count,source;
2487   PetscScalar    *va;
2488   MatScalar      *ba;
2489   MPI_Status     stat;
2490 
2491   PetscFunctionBegin;
2492   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2493   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2494   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2495 
2496   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2497   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2498 
2499   bs   = A->rmap->bs;
2500   mbs  = a->mbs;
2501   Mbs  = a->Mbs;
2502   ba   = b->a;
2503   bi   = b->i;
2504   bj   = b->j;
2505 
2506   /* find ownerships */
2507   rowners_bs = A->rmap->range;
2508 
2509   /* each proc creates an array to be distributed */
2510   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2511   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2512 
2513   /* row_max for B */
2514   if (rank != size-1){
2515     for (i=0; i<mbs; i++) {
2516       ncols = bi[1] - bi[0]; bi++;
2517       brow  = bs*i;
2518       for (j=0; j<ncols; j++){
2519         bcol = bs*(*bj);
2520         for (kcol=0; kcol<bs; kcol++){
2521           col = bcol + kcol;                 /* local col index */
2522           col += rowners_bs[rank+1];      /* global col index */
2523           for (krow=0; krow<bs; krow++){
2524             atmp = PetscAbsScalar(*ba); ba++;
2525             row = brow + krow;    /* local row index */
2526             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2527             if (work[col] < atmp) work[col] = atmp;
2528           }
2529         }
2530         bj++;
2531       }
2532     }
2533 
2534     /* send values to its owners */
2535     for (dest=rank+1; dest<size; dest++){
2536       svalues = work + rowners_bs[dest];
2537       count   = rowners_bs[dest+1]-rowners_bs[dest];
2538       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2539     }
2540   }
2541 
2542   /* receive values */
2543   if (rank){
2544     rvalues = work;
2545     count   = rowners_bs[rank+1]-rowners_bs[rank];
2546     for (source=0; source<rank; source++){
2547       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2548       /* process values */
2549       for (i=0; i<count; i++){
2550         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2551       }
2552     }
2553   }
2554 
2555   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2556   ierr = PetscFree(work);CHKERRQ(ierr);
2557   PetscFunctionReturn(0);
2558 }
2559 
2560 #undef __FUNCT__
2561 #define __FUNCT__ "MatSOR_MPISBAIJ"
2562 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2563 {
2564   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2565   PetscErrorCode    ierr;
2566   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2567   PetscScalar       *x,*ptr,*from;
2568   Vec               bb1;
2569   const PetscScalar *b;
2570 
2571   PetscFunctionBegin;
2572   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2573   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2574 
2575   if (flag == SOR_APPLY_UPPER) {
2576     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2577     PetscFunctionReturn(0);
2578   }
2579 
2580   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2581     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2582       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2583       its--;
2584     }
2585 
2586     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2587     while (its--){
2588 
2589       /* lower triangular part: slvec0b = - B^T*xx */
2590       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2591 
2592       /* copy xx into slvec0a */
2593       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2594       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2595       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2596       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2597 
2598       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2599 
2600       /* copy bb into slvec1a */
2601       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2602       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2603       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2604       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2605 
2606       /* set slvec1b = 0 */
2607       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2608 
2609       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2610       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2611       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2612       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2613 
2614       /* upper triangular part: bb1 = bb1 - B*x */
2615       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2616 
2617       /* local diagonal sweep */
2618       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2619     }
2620     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2621   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2622     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2623   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2624     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2625   } else if (flag & SOR_EISENSTAT) {
2626     Vec               xx1;
2627     PetscBool         hasop;
2628     const PetscScalar *diag;
2629     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2630     PetscInt          i,n;
2631 
2632     if (!mat->xx1) {
2633       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2634       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2635     }
2636     xx1 = mat->xx1;
2637     bb1 = mat->bb1;
2638 
2639     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2640 
2641     if (!mat->diag) {
2642       /* this is wrong for same matrix with new nonzero values */
2643       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2644       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2645     }
2646     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2647 
2648     if (hasop) {
2649       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2650       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2651     } else {
2652       /*
2653           These two lines are replaced by code that may be a bit faster for a good compiler
2654       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2655       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2656       */
2657       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2658       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2659       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2660       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2661       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2662       if (omega == 1.0) {
2663 	for (i=0; i<n; i++) {
2664 	  sl[i] = b[i] - diag[i]*x[i];
2665 	}
2666         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2667       } else {
2668 	for (i=0; i<n; i++) {
2669 	  sl[i] = b[i] + scale*diag[i]*x[i];
2670 	}
2671         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2672       }
2673       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2674       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2675       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2676       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2677     }
2678 
2679     /* multiply off-diagonal portion of matrix */
2680     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2681     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2682     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2683     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2684     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2685     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2686     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2687     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2688     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2689     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2690 
2691     /* local sweep */
2692     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2693     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2694   } else {
2695     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2696   }
2697   PetscFunctionReturn(0);
2698 }
2699 
2700 #undef __FUNCT__
2701 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2702 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2703 {
2704   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2705   PetscErrorCode ierr;
2706   Vec            lvec1,bb1;
2707 
2708   PetscFunctionBegin;
2709   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2710   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2711 
2712   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2713     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2714       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2715       its--;
2716     }
2717 
2718     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2719     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2720     while (its--){
2721       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2722 
2723       /* lower diagonal part: bb1 = bb - B^T*xx */
2724       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2725       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2726 
2727       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2728       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2729       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2730 
2731       /* upper diagonal part: bb1 = bb1 - B*x */
2732       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2733       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2734 
2735       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2736 
2737       /* diagonal sweep */
2738       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2739     }
2740     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2741     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2742   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2743   PetscFunctionReturn(0);
2744 }
2745 
2746 #undef __FUNCT__
2747 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2748 /*@
2749      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2750          CSR format the local rows.
2751 
2752    Collective on MPI_Comm
2753 
2754    Input Parameters:
2755 +  comm - MPI communicator
2756 .  bs - the block size, only a block size of 1 is supported
2757 .  m - number of local rows (Cannot be PETSC_DECIDE)
2758 .  n - This value should be the same as the local size used in creating the
2759        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2760        calculated if N is given) For square matrices n is almost always m.
2761 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2762 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2763 .   i - row indices
2764 .   j - column indices
2765 -   a - matrix values
2766 
2767    Output Parameter:
2768 .   mat - the matrix
2769 
2770    Level: intermediate
2771 
2772    Notes:
2773        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2774      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2775      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2776 
2777        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2778 
2779 .keywords: matrix, aij, compressed row, sparse, parallel
2780 
2781 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2782           MPIAIJ, MatCreateAIJ(), MatCreateMPIAIJWithSplitArrays()
2783 @*/
2784 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2785 {
2786   PetscErrorCode ierr;
2787 
2788 
2789  PetscFunctionBegin;
2790   if (i[0]) {
2791     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2792   }
2793   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2794   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2795   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2796   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2797   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2798   PetscFunctionReturn(0);
2799 }
2800 
2801 
2802 #undef __FUNCT__
2803 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2804 /*@C
2805    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2806    (the default parallel PETSc format).
2807 
2808    Collective on MPI_Comm
2809 
2810    Input Parameters:
2811 +  A - the matrix
2812 .  bs - the block size
2813 .  i - the indices into j for the start of each local row (starts with zero)
2814 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2815 -  v - optional values in the matrix
2816 
2817    Level: developer
2818 
2819 .keywords: matrix, aij, compressed row, sparse, parallel
2820 
2821 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateAIJ(), MPIAIJ
2822 @*/
2823 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2824 {
2825   PetscErrorCode ierr;
2826 
2827   PetscFunctionBegin;
2828   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2829   PetscFunctionReturn(0);
2830 }
2831 
2832 
2833