xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 701b2c3ffa9ad67f0aad350c7a294c28d93c2ea1)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
349       if (!baij->donotstash) {
350         if (roworiented) {
351           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
352         } else {
353           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
354         }
355       }
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatGetValues_MPISBAIJ"
363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
364 {
365   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
366   PetscErrorCode ierr;
367   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
368   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
373     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
374     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
375       row = idxm[i] - bsrstart;
376       for (j=0; j<n; j++) {
377         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
378         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
379         if (idxn[j] >= bscstart && idxn[j] < bscend){
380           col = idxn[j] - bscstart;
381           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
382         } else {
383           if (!baij->colmap) {
384             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
385           }
386 #if defined (PETSC_USE_CTABLE)
387           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
388           data --;
389 #else
390           data = baij->colmap[idxn[j]/bs]-1;
391 #endif
392           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
393           else {
394             col  = data + idxn[j]%bs;
395             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
396           }
397         }
398       }
399     } else {
400       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
401     }
402   }
403  PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatNorm_MPISBAIJ"
408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
409 {
410   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscReal      sum[2],*lnorm2;
413 
414   PetscFunctionBegin;
415   if (baij->size == 1) {
416     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
417   } else {
418     if (type == NORM_FROBENIUS) {
419       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
420       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
421       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
422       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
423       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
424       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
425       *norm = sqrt(sum[0] + 2*sum[1]);
426       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
427     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
428       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
429       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
430       PetscReal    *rsum,*rsum2,vabs;
431       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
432       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
433       MatScalar    *v;
434 
435       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
436       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
437       /* Amat */
438       v = amat->a; jj = amat->j;
439       for (brow=0; brow<mbs; brow++) {
440         grow = bs*(rstart + brow);
441         nz = amat->i[brow+1] - amat->i[brow];
442         for (bcol=0; bcol<nz; bcol++){
443           gcol = bs*(rstart + *jj); jj++;
444           for (col=0; col<bs; col++){
445             for (row=0; row<bs; row++){
446               vabs = PetscAbsScalar(*v); v++;
447               rsum[gcol+col] += vabs;
448               /* non-diagonal block */
449               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
450             }
451           }
452         }
453       }
454       /* Bmat */
455       v = bmat->a; jj = bmat->j;
456       for (brow=0; brow<mbs; brow++) {
457         grow = bs*(rstart + brow);
458         nz = bmat->i[brow+1] - bmat->i[brow];
459         for (bcol=0; bcol<nz; bcol++){
460           gcol = bs*garray[*jj]; jj++;
461           for (col=0; col<bs; col++){
462             for (row=0; row<bs; row++){
463               vabs = PetscAbsScalar(*v); v++;
464               rsum[gcol+col] += vabs;
465               rsum[grow+row] += vabs;
466             }
467           }
468         }
469       }
470       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
471       *norm = 0.0;
472       for (col=0; col<mat->cmap->N; col++) {
473         if (rsum2[col] > *norm) *norm = rsum2[col];
474       }
475       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
476     } else {
477       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
486 {
487   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
488   PetscErrorCode ierr;
489   PetscInt       nstash,reallocs;
490   InsertMode     addv;
491 
492   PetscFunctionBegin;
493   if (baij->donotstash || mat->nooffprocentries) {
494     PetscFunctionReturn(0);
495   }
496 
497   /* make sure all processors are either in INSERTMODE or ADDMODE */
498   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
499   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
500   mat->insertmode = addv; /* in case this processor had no cache */
501 
502   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
503   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
504   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
505   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
506   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
507   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
514 {
515   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
516   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
517   PetscErrorCode ierr;
518   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
519   PetscInt       *row,*col;
520   PetscBool      other_disassembled;
521   PetscMPIInt    n;
522   PetscBool      r1,r2,r3;
523   MatScalar      *val;
524   InsertMode     addv = mat->insertmode;
525 
526   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
527   PetscFunctionBegin;
528 
529   if (!baij->donotstash &&  !mat->nooffprocentries) {
530     while (1) {
531       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
532       if (!flg) break;
533 
534       for (i=0; i<n;) {
535         /* Now identify the consecutive vals belonging to the same row */
536         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
537         if (j < n) ncols = j-i;
538         else       ncols = n-i;
539         /* Now assemble all these values with a single function call */
540         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
541         i = j;
542       }
543     }
544     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
545     /* Now process the block-stash. Since the values are stashed column-oriented,
546        set the roworiented flag to column oriented, and after MatSetValues()
547        restore the original flags */
548     r1 = baij->roworiented;
549     r2 = a->roworiented;
550     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
551     baij->roworiented = PETSC_FALSE;
552     a->roworiented    = PETSC_FALSE;
553     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
554     while (1) {
555       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
556       if (!flg) break;
557 
558       for (i=0; i<n;) {
559         /* Now identify the consecutive vals belonging to the same row */
560         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
561         if (j < n) ncols = j-i;
562         else       ncols = n-i;
563         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
564         i = j;
565       }
566     }
567     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
568     baij->roworiented = r1;
569     a->roworiented    = r2;
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
593   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
594   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
595 
596   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
597   baij->rowvalues = 0;
598 
599   PetscFunctionReturn(0);
600 }
601 
602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
603 #undef __FUNCT__
604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
606 {
607   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
608   PetscErrorCode    ierr;
609   PetscInt          bs = mat->rmap->bs;
610   PetscMPIInt       size = baij->size,rank = baij->rank;
611   PetscBool         iascii,isdraw;
612   PetscViewer       sviewer;
613   PetscViewerFormat format;
614 
615   PetscFunctionBegin;
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
618   if (iascii) {
619     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
620     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
621       MatInfo info;
622       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
623       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
624       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
625              rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscBool  isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
713       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
714       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
715     }
716     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
717     ierr = MatDestroy(A);CHKERRQ(ierr);
718   }
719   PetscFunctionReturn(0);
720 }
721 
722 #undef __FUNCT__
723 #define __FUNCT__ "MatView_MPISBAIJ"
724 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
725 {
726   PetscErrorCode ierr;
727   PetscBool      iascii,isdraw,issocket,isbinary;
728 
729   PetscFunctionBegin;
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
734   if (iascii || isdraw || issocket || isbinary) {
735     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
736   } else {
737     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
738   }
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatDestroy_MPISBAIJ"
744 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
745 {
746   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode ierr;
748 
749   PetscFunctionBegin;
750 #if defined(PETSC_USE_LOG)
751   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
752 #endif
753   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
754   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
755   if (baij->A){ierr = MatDestroy(baij->A);CHKERRQ(ierr);}
756   if (baij->B){ierr = MatDestroy(baij->B);CHKERRQ(ierr);}
757 #if defined (PETSC_USE_CTABLE)
758   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
759 #else
760   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
761 #endif
762   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
763   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
764   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
765   if (baij->slvec0) {
766     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
767     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
768   }
769   if (baij->slvec1) {
770     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
771     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
772     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
773   }
774   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
775   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
776   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
777   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
778   if (baij->diag) {ierr = VecDestroy(baij->diag);CHKERRQ(ierr);}
779   if (baij->bb1) {ierr = VecDestroy(baij->bb1);CHKERRQ(ierr);}
780   if (baij->xx1) {ierr = VecDestroy(baij->xx1);CHKERRQ(ierr);}
781 #if defined(PETSC_USE_REAL_MAT_SINGLE)
782   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
783 #endif
784   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
785   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
786   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
787   ierr = PetscFree(baij);CHKERRQ(ierr);
788 
789   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
792   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
793   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
794   PetscFunctionReturn(0);
795 }
796 
797 #undef __FUNCT__
798 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
799 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
800 {
801   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
802   PetscErrorCode ierr;
803   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
804   PetscScalar    *x,*from;
805 
806   PetscFunctionBegin;
807   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
808   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
809 
810   /* diagonal part */
811   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
812   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
813 
814   /* subdiagonal part */
815   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
816 
817   /* copy x into the vec slvec0 */
818   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
819   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
820 
821   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
822   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
823   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
824 
825   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
826   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
827   /* supperdiagonal part */
828   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
829   PetscFunctionReturn(0);
830 }
831 
832 #undef __FUNCT__
833 #define __FUNCT__ "MatMult_MPISBAIJ"
834 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
835 {
836   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
837   PetscErrorCode ierr;
838   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
839   PetscScalar    *x,*from;
840 
841   PetscFunctionBegin;
842   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
843   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
844 
845   /* diagonal part */
846   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
847   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
848 
849   /* subdiagonal part */
850   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
851 
852   /* copy x into the vec slvec0 */
853   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
854   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
855 
856   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
857   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
858   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
859 
860   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
861   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
862   /* supperdiagonal part */
863   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
864   PetscFunctionReturn(0);
865 }
866 
867 #undef __FUNCT__
868 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
869 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
870 {
871   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
872   PetscErrorCode ierr;
873   PetscInt       nt;
874 
875   PetscFunctionBegin;
876   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
877   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
878 
879   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
880   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881 
882   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
883   /* do diagonal part */
884   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
885   /* do supperdiagonal part */
886   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
887   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
888   /* do subdiagonal part */
889   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
890   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
891   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
892 
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
898 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
899 {
900   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
901   PetscErrorCode ierr;
902   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
903   PetscScalar    *x,*from,zero=0.0;
904 
905   PetscFunctionBegin;
906   /*
907   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
908   PetscSynchronizedFlush(((PetscObject)A)->comm);
909   */
910   /* diagonal part */
911   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
912   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
913 
914   /* subdiagonal part */
915   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
916 
917   /* copy x into the vec slvec0 */
918   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
919   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
920   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
921   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
922 
923   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
925   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
926 
927   /* supperdiagonal part */
928   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
929 
930   PetscFunctionReturn(0);
931 }
932 
933 #undef __FUNCT__
934 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
935 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
936 {
937   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
938   PetscErrorCode ierr;
939 
940   PetscFunctionBegin;
941   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
942   /* do diagonal part */
943   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
944   /* do supperdiagonal part */
945   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
946   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
947 
948   /* do subdiagonal part */
949   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
950   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
951   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
952 
953   PetscFunctionReturn(0);
954 }
955 
956 /*
957   This only works correctly for square matrices where the subblock A->A is the
958    diagonal block
959 */
960 #undef __FUNCT__
961 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
962 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
963 {
964   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
965   PetscErrorCode ierr;
966 
967   PetscFunctionBegin;
968   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
969   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatScale_MPISBAIJ"
975 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
982   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatGetRow_MPISBAIJ"
988 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
989 {
990   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
991   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
992   PetscErrorCode ierr;
993   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
994   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
995   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
996 
997   PetscFunctionBegin;
998   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
999   mat->getrowactive = PETSC_TRUE;
1000 
1001   if (!mat->rowvalues && (idx || v)) {
1002     /*
1003         allocate enough space to hold information from the longest row.
1004     */
1005     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1006     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1007     PetscInt     max = 1,mbs = mat->mbs,tmp;
1008     for (i=0; i<mbs; i++) {
1009       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1010       if (max < tmp) { max = tmp; }
1011     }
1012     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1013   }
1014 
1015   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1016   lrow = row - brstart;  /* local row index */
1017 
1018   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1019   if (!v)   {pvA = 0; pvB = 0;}
1020   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1021   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1022   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1023   nztot = nzA + nzB;
1024 
1025   cmap  = mat->garray;
1026   if (v  || idx) {
1027     if (nztot) {
1028       /* Sort by increasing column numbers, assuming A and B already sorted */
1029       PetscInt imark = -1;
1030       if (v) {
1031         *v = v_p = mat->rowvalues;
1032         for (i=0; i<nzB; i++) {
1033           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1034           else break;
1035         }
1036         imark = i;
1037         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1038         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1039       }
1040       if (idx) {
1041         *idx = idx_p = mat->rowindices;
1042         if (imark > -1) {
1043           for (i=0; i<imark; i++) {
1044             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1045           }
1046         } else {
1047           for (i=0; i<nzB; i++) {
1048             if (cmap[cworkB[i]/bs] < cstart)
1049               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1050             else break;
1051           }
1052           imark = i;
1053         }
1054         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1055         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1056       }
1057     } else {
1058       if (idx) *idx = 0;
1059       if (v)   *v   = 0;
1060     }
1061   }
1062   *nz = nztot;
1063   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1076   baij->getrowactive = PETSC_FALSE;
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1082 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1086 
1087   PetscFunctionBegin;
1088   aA->getrow_utriangular = PETSC_TRUE;
1089   PetscFunctionReturn(0);
1090 }
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1093 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1094 {
1095   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1096   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1097 
1098   PetscFunctionBegin;
1099   aA->getrow_utriangular = PETSC_FALSE;
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1105 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1106 {
1107   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1108   PetscErrorCode ierr;
1109 
1110   PetscFunctionBegin;
1111   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1112   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1118 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1119 {
1120   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1121   PetscErrorCode ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1125   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->rmap->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1183   info->fill_ratio_needed = 0;
1184   info->factor_mallocs    = 0;
1185   PetscFunctionReturn(0);
1186 }
1187 
1188 #undef __FUNCT__
1189 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1190 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1191 {
1192   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1193   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1194   PetscErrorCode ierr;
1195 
1196   PetscFunctionBegin;
1197   switch (op) {
1198   case MAT_NEW_NONZERO_LOCATIONS:
1199   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1200   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1201   case MAT_KEEP_NONZERO_PATTERN:
1202   case MAT_NEW_NONZERO_LOCATION_ERR:
1203     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1204     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1205     break;
1206   case MAT_ROW_ORIENTED:
1207     a->roworiented = flg;
1208     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1210     break;
1211   case MAT_NEW_DIAGONALS:
1212     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = flg;
1216     break;
1217   case MAT_USE_HASH_TABLE:
1218     a->ht_flag = flg;
1219     break;
1220   case MAT_HERMITIAN:
1221     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1222     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1223     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1224     break;
1225   case MAT_SPD:
1226     A->spd_set                         = PETSC_TRUE;
1227     A->spd                             = flg;
1228     if (flg) {
1229       A->symmetric                     = PETSC_TRUE;
1230       A->structurally_symmetric        = PETSC_TRUE;
1231       A->symmetric_set                 = PETSC_TRUE;
1232       A->structurally_symmetric_set    = PETSC_TRUE;
1233     }
1234     break;
1235   case MAT_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_STRUCTURALLY_SYMMETRIC:
1239     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1240     break;
1241   case MAT_SYMMETRY_ETERNAL:
1242     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1243     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1244     break;
1245   case MAT_IGNORE_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_ERROR_LOWER_TRIANGULAR:
1249     aA->ignore_ltriangular = flg;
1250     break;
1251   case MAT_GETROW_UPPERTRIANGULAR:
1252     aA->getrow_utriangular = flg;
1253     break;
1254   default:
1255     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1256   }
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1262 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1263 {
1264   PetscErrorCode ierr;
1265   PetscFunctionBegin;
1266   if (MAT_INITIAL_MATRIX || *B != A) {
1267     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1268   }
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1274 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1275 {
1276   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1277   Mat            a=baij->A, b=baij->B;
1278   PetscErrorCode ierr;
1279   PetscInt       nv,m,n;
1280   PetscBool      flg;
1281 
1282   PetscFunctionBegin;
1283   if (ll != rr){
1284     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1285     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1286   }
1287   if (!ll) PetscFunctionReturn(0);
1288 
1289   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1290   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1291 
1292   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1293   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1294 
1295   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1296 
1297   /* left diagonalscale the off-diagonal part */
1298   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1299 
1300   /* scale the diagonal part */
1301   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1302 
1303   /* right diagonalscale the off-diagonal part */
1304   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1305   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1311 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1312 {
1313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1314   PetscErrorCode ierr;
1315 
1316   PetscFunctionBegin;
1317   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatEqual_MPISBAIJ"
1325 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1326 {
1327   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1328   Mat            a,b,c,d;
1329   PetscBool      flg;
1330   PetscErrorCode ierr;
1331 
1332   PetscFunctionBegin;
1333   a = matA->A; b = matA->B;
1334   c = matB->A; d = matB->B;
1335 
1336   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1337   if (flg) {
1338     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1339   }
1340   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 #undef __FUNCT__
1345 #define __FUNCT__ "MatCopy_MPISBAIJ"
1346 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1347 {
1348   PetscErrorCode ierr;
1349   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1350   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1351 
1352   PetscFunctionBegin;
1353   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1354   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1355     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1356     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1357     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1358   } else {
1359     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1360     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1361   }
1362   PetscFunctionReturn(0);
1363 }
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1367 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1368 {
1369   PetscErrorCode ierr;
1370 
1371   PetscFunctionBegin;
1372   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1378 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1379 {
1380   PetscErrorCode ierr;
1381   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1382   PetscBLASInt   bnz,one=1;
1383   Mat_SeqSBAIJ   *xa,*ya;
1384   Mat_SeqBAIJ    *xb,*yb;
1385 
1386   PetscFunctionBegin;
1387   if (str == SAME_NONZERO_PATTERN) {
1388     PetscScalar alpha = a;
1389     xa = (Mat_SeqSBAIJ *)xx->A->data;
1390     ya = (Mat_SeqSBAIJ *)yy->A->data;
1391     bnz = PetscBLASIntCast(xa->nz);
1392     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1393     xb = (Mat_SeqBAIJ *)xx->B->data;
1394     yb = (Mat_SeqBAIJ *)yy->B->data;
1395     bnz = PetscBLASIntCast(xb->nz);
1396     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1397   } else {
1398     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1399     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1407 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1408 {
1409   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1410   PetscInt        rbs,cbs;
1411   PetscErrorCode  ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1415   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1416   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1417   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1418   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1419   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1420   PetscFunctionReturn(0);
1421 }
1422 
1423 #undef __FUNCT__
1424 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1425 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1426 {
1427   PetscErrorCode ierr;
1428   PetscInt       i;
1429   PetscBool      flg;
1430 
1431   PetscFunctionBegin;
1432   for (i=0; i<n; i++) {
1433     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1434     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1435   }
1436   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 
1441 /* -------------------------------------------------------------------*/
1442 static struct _MatOps MatOps_Values = {
1443        MatSetValues_MPISBAIJ,
1444        MatGetRow_MPISBAIJ,
1445        MatRestoreRow_MPISBAIJ,
1446        MatMult_MPISBAIJ,
1447 /* 4*/ MatMultAdd_MPISBAIJ,
1448        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1449        MatMultAdd_MPISBAIJ,
1450        0,
1451        0,
1452        0,
1453 /*10*/ 0,
1454        0,
1455        0,
1456        MatSOR_MPISBAIJ,
1457        MatTranspose_MPISBAIJ,
1458 /*15*/ MatGetInfo_MPISBAIJ,
1459        MatEqual_MPISBAIJ,
1460        MatGetDiagonal_MPISBAIJ,
1461        MatDiagonalScale_MPISBAIJ,
1462        MatNorm_MPISBAIJ,
1463 /*20*/ MatAssemblyBegin_MPISBAIJ,
1464        MatAssemblyEnd_MPISBAIJ,
1465        MatSetOption_MPISBAIJ,
1466        MatZeroEntries_MPISBAIJ,
1467 /*24*/ 0,
1468        0,
1469        0,
1470        0,
1471        0,
1472 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1473        0,
1474        0,
1475        0,
1476        0,
1477 /*34*/ MatDuplicate_MPISBAIJ,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*39*/ MatAXPY_MPISBAIJ,
1483        MatGetSubMatrices_MPISBAIJ,
1484        MatIncreaseOverlap_MPISBAIJ,
1485        MatGetValues_MPISBAIJ,
1486        MatCopy_MPISBAIJ,
1487 /*44*/ 0,
1488        MatScale_MPISBAIJ,
1489        0,
1490        0,
1491        0,
1492 /*49*/ MatSetBlockSize_MPISBAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497 /*54*/ 0,
1498        0,
1499        MatSetUnfactored_MPISBAIJ,
1500        0,
1501        MatSetValuesBlocked_MPISBAIJ,
1502 /*59*/ 0,
1503        0,
1504        0,
1505        0,
1506        0,
1507 /*64*/ 0,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1513        0,
1514        0,
1515        0,
1516        0,
1517 /*74*/ 0,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*79*/ 0,
1523        0,
1524        0,
1525        0,
1526        MatLoad_MPISBAIJ,
1527 /*84*/ 0,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*89*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*94*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*99*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*104*/0,
1548        MatRealPart_MPISBAIJ,
1549        MatImaginaryPart_MPISBAIJ,
1550        MatGetRowUpperTriangular_MPISBAIJ,
1551        MatRestoreRowUpperTriangular_MPISBAIJ,
1552 /*109*/0,
1553        0,
1554        0,
1555        0,
1556        0,
1557 /*114*/0,
1558        0,
1559        0,
1560        0,
1561        0,
1562 /*119*/0,
1563        0,
1564        0,
1565        0
1566 };
1567 
1568 
1569 EXTERN_C_BEGIN
1570 #undef __FUNCT__
1571 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1572 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscBool  *iscopy,MatReuse reuse,Mat *a)
1573 {
1574   PetscFunctionBegin;
1575   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1576   *iscopy = PETSC_FALSE;
1577   PetscFunctionReturn(0);
1578 }
1579 EXTERN_C_END
1580 
1581 EXTERN_C_BEGIN
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1584 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1585 {
1586   Mat_MPISBAIJ   *b;
1587   PetscErrorCode ierr;
1588   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1589 
1590   PetscFunctionBegin;
1591   if (bs < 0){
1592     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1593       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1594     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1595     bs   = PetscAbs(bs);
1596   }
1597   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1598   bs = newbs;
1599 
1600   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1601   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1602   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1603   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1604 
1605   B->rmap->bs = B->cmap->bs = bs;
1606   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1607   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1608 
1609   if (d_nnz) {
1610     for (i=0; i<B->rmap->n/bs; i++) {
1611       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1612     }
1613   }
1614   if (o_nnz) {
1615     for (i=0; i<B->rmap->n/bs; i++) {
1616       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1617     }
1618   }
1619 
1620   b   = (Mat_MPISBAIJ*)B->data;
1621   mbs = B->rmap->n/bs;
1622   Mbs = B->rmap->N/bs;
1623   if (mbs*bs != B->rmap->n) {
1624     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1625   }
1626 
1627   B->rmap->bs  = bs;
1628   b->bs2 = bs*bs;
1629   b->mbs = mbs;
1630   b->nbs = mbs;
1631   b->Mbs = Mbs;
1632   b->Nbs = Mbs;
1633 
1634   for (i=0; i<=b->size; i++) {
1635     b->rangebs[i] = B->rmap->range[i]/bs;
1636   }
1637   b->rstartbs = B->rmap->rstart/bs;
1638   b->rendbs   = B->rmap->rend/bs;
1639 
1640   b->cstartbs = B->cmap->rstart/bs;
1641   b->cendbs   = B->cmap->rend/bs;
1642 
1643   if (!B->preallocated) {
1644     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1645     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1646     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1647     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1648     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1649     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1650     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1651     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1652     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1653   }
1654 
1655   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1656   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1657   B->preallocated = PETSC_TRUE;
1658   PetscFunctionReturn(0);
1659 }
1660 EXTERN_C_END
1661 
1662 EXTERN_C_BEGIN
1663 #undef __FUNCT__
1664 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1665 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1666 {
1667   PetscInt       m,rstart,cstart,cend;
1668   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1669   const PetscInt *JJ=0;
1670   PetscScalar    *values=0;
1671   PetscErrorCode ierr;
1672 
1673   PetscFunctionBegin;
1674 
1675   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1676   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1677   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1678   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1679   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1680   m      = B->rmap->n/bs;
1681   rstart = B->rmap->rstart/bs;
1682   cstart = B->cmap->rstart/bs;
1683   cend   = B->cmap->rend/bs;
1684 
1685   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1686   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1687   for (i=0; i<m; i++) {
1688     nz = ii[i+1] - ii[i];
1689     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1690     nz_max = PetscMax(nz_max,nz);
1691     JJ  = jj + ii[i];
1692     for (j=0; j<nz; j++) {
1693       if (*JJ >= cstart) break;
1694       JJ++;
1695     }
1696     d = 0;
1697     for (; j<nz; j++) {
1698       if (*JJ++ >= cend) break;
1699       d++;
1700     }
1701     d_nnz[i] = d;
1702     o_nnz[i] = nz - d;
1703   }
1704   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1705   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1706 
1707   values = (PetscScalar*)V;
1708   if (!values) {
1709     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1710     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1711   }
1712   for (i=0; i<m; i++) {
1713     PetscInt          row    = i + rstart;
1714     PetscInt          ncols  = ii[i+1] - ii[i];
1715     const PetscInt    *icols = jj + ii[i];
1716     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1717     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1718   }
1719 
1720   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1721   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1722   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1723 
1724   PetscFunctionReturn(0);
1725 }
1726 EXTERN_C_END
1727 
1728 EXTERN_C_BEGIN
1729 #if defined(PETSC_HAVE_MUMPS)
1730 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1731 #endif
1732 #if defined(PETSC_HAVE_SPOOLES)
1733 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1734 #endif
1735 #if defined(PETSC_HAVE_PASTIX)
1736 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1737 #endif
1738 EXTERN_C_END
1739 
1740 /*MC
1741    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1742    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1743 
1744    Options Database Keys:
1745 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1746 
1747   Level: beginner
1748 
1749 .seealso: MatCreateMPISBAIJ
1750 M*/
1751 
1752 EXTERN_C_BEGIN
1753 #undef __FUNCT__
1754 #define __FUNCT__ "MatCreate_MPISBAIJ"
1755 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1756 {
1757   Mat_MPISBAIJ   *b;
1758   PetscErrorCode ierr;
1759   PetscBool      flg;
1760 
1761   PetscFunctionBegin;
1762 
1763   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1764   B->data = (void*)b;
1765   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1766 
1767   B->ops->destroy    = MatDestroy_MPISBAIJ;
1768   B->ops->view       = MatView_MPISBAIJ;
1769   B->assembled       = PETSC_FALSE;
1770 
1771   B->insertmode = NOT_SET_VALUES;
1772   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1773   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1774 
1775   /* build local table of row and column ownerships */
1776   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1777 
1778   /* build cache for off array entries formed */
1779   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1780   b->donotstash  = PETSC_FALSE;
1781   b->colmap      = PETSC_NULL;
1782   b->garray      = PETSC_NULL;
1783   b->roworiented = PETSC_TRUE;
1784 
1785   /* stuff used in block assembly */
1786   b->barray       = 0;
1787 
1788   /* stuff used for matrix vector multiply */
1789   b->lvec         = 0;
1790   b->Mvctx        = 0;
1791   b->slvec0       = 0;
1792   b->slvec0b      = 0;
1793   b->slvec1       = 0;
1794   b->slvec1a      = 0;
1795   b->slvec1b      = 0;
1796   b->sMvctx       = 0;
1797 
1798   /* stuff for MatGetRow() */
1799   b->rowindices   = 0;
1800   b->rowvalues    = 0;
1801   b->getrowactive = PETSC_FALSE;
1802 
1803   /* hash table stuff */
1804   b->ht           = 0;
1805   b->hd           = 0;
1806   b->ht_size      = 0;
1807   b->ht_flag      = PETSC_FALSE;
1808   b->ht_fact      = 0;
1809   b->ht_total_ct  = 0;
1810   b->ht_insert_ct = 0;
1811 
1812   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1813   b->ijonly       = PETSC_FALSE;
1814 
1815   b->in_loc       = 0;
1816   b->v_loc        = 0;
1817   b->n_loc        = 0;
1818   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1819     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1820     if (flg) {
1821       PetscReal fact = 1.39;
1822       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1823       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1824       if (fact <= 1.0) fact = 1.39;
1825       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1826       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1827     }
1828   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1829 
1830 #if defined(PETSC_HAVE_PASTIX)
1831   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1832 					   "MatGetFactor_mpisbaij_pastix",
1833 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1834 #endif
1835 #if defined(PETSC_HAVE_MUMPS)
1836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1837                                      "MatGetFactor_sbaij_mumps",
1838                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1839 #endif
1840 #if defined(PETSC_HAVE_SPOOLES)
1841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1842                                      "MatGetFactor_mpisbaij_spooles",
1843                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1844 #endif
1845   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1846                                      "MatStoreValues_MPISBAIJ",
1847                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1848   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1849                                      "MatRetrieveValues_MPISBAIJ",
1850                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1851   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1852                                      "MatGetDiagonalBlock_MPISBAIJ",
1853                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1854   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1855                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1856                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1857   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1858                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1859                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1860   B->symmetric                  = PETSC_TRUE;
1861   B->structurally_symmetric     = PETSC_TRUE;
1862   B->symmetric_set              = PETSC_TRUE;
1863   B->structurally_symmetric_set = PETSC_TRUE;
1864   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1865   PetscFunctionReturn(0);
1866 }
1867 EXTERN_C_END
1868 
1869 /*MC
1870    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1871 
1872    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1873    and MATMPISBAIJ otherwise.
1874 
1875    Options Database Keys:
1876 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1877 
1878   Level: beginner
1879 
1880 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1881 M*/
1882 
1883 EXTERN_C_BEGIN
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatCreate_SBAIJ"
1886 PetscErrorCode  MatCreate_SBAIJ(Mat A)
1887 {
1888   PetscErrorCode ierr;
1889   PetscMPIInt    size;
1890 
1891   PetscFunctionBegin;
1892   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1893   if (size == 1) {
1894     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1895   } else {
1896     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 EXTERN_C_END
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1904 /*@C
1905    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1906    the user should preallocate the matrix storage by setting the parameters
1907    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1908    performance can be increased by more than a factor of 50.
1909 
1910    Collective on Mat
1911 
1912    Input Parameters:
1913 +  A - the matrix
1914 .  bs   - size of blockk
1915 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1916            submatrix  (same for all local rows)
1917 .  d_nnz - array containing the number of block nonzeros in the various block rows
1918            in the upper triangular and diagonal part of the in diagonal portion of the local
1919            (possibly different for each block row) or PETSC_NULL.  You must leave room
1920            for the diagonal entry even if it is zero.
1921 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1922            submatrix (same for all local rows).
1923 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1924            off-diagonal portion of the local submatrix (possibly different for
1925            each block row) or PETSC_NULL.
1926 
1927 
1928    Options Database Keys:
1929 .   -mat_no_unroll - uses code that does not unroll the loops in the
1930                      block calculations (much slower)
1931 .   -mat_block_size - size of the blocks to use
1932 
1933    Notes:
1934 
1935    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1936    than it must be used on all processors that share the object for that argument.
1937 
1938    If the *_nnz parameter is given then the *_nz parameter is ignored
1939 
1940    Storage Information:
1941    For a square global matrix we define each processor's diagonal portion
1942    to be its local rows and the corresponding columns (a square submatrix);
1943    each processor's off-diagonal portion encompasses the remainder of the
1944    local matrix (a rectangular submatrix).
1945 
1946    The user can specify preallocated storage for the diagonal part of
1947    the local submatrix with either d_nz or d_nnz (not both).  Set
1948    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1949    memory allocation.  Likewise, specify preallocated storage for the
1950    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1951 
1952    You can call MatGetInfo() to get information on how effective the preallocation was;
1953    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1954    You can also run with the option -info and look for messages with the string
1955    malloc in them to see if additional memory allocation was needed.
1956 
1957    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1958    the figure below we depict these three local rows and all columns (0-11).
1959 
1960 .vb
1961            0 1 2 3 4 5 6 7 8 9 10 11
1962           -------------------
1963    row 3  |  o o o d d d o o o o o o
1964    row 4  |  o o o d d d o o o o o o
1965    row 5  |  o o o d d d o o o o o o
1966           -------------------
1967 .ve
1968 
1969    Thus, any entries in the d locations are stored in the d (diagonal)
1970    submatrix, and any entries in the o locations are stored in the
1971    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1972    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1973 
1974    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1975    plus the diagonal part of the d matrix,
1976    and o_nz should indicate the number of block nonzeros per row in the upper triangular
1977    part of the o matrix.
1978    In general, for PDE problems in which most nonzeros are near the diagonal,
1979    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1980    or you will get TERRIBLE performance; see the users' manual chapter on
1981    matrices.
1982 
1983    Level: intermediate
1984 
1985 .keywords: matrix, block, aij, compressed row, sparse, parallel
1986 
1987 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1988 @*/
1989 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1990 {
1991   PetscErrorCode ierr;
1992 
1993   PetscFunctionBegin;
1994   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatCreateMPISBAIJ"
2000 /*@C
2001    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2002    (block compressed row).  For good matrix assembly performance
2003    the user should preallocate the matrix storage by setting the parameters
2004    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2005    performance can be increased by more than a factor of 50.
2006 
2007    Collective on MPI_Comm
2008 
2009    Input Parameters:
2010 +  comm - MPI communicator
2011 .  bs   - size of blockk
2012 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2013            This value should be the same as the local size used in creating the
2014            y vector for the matrix-vector product y = Ax.
2015 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2016            This value should be the same as the local size used in creating the
2017            x vector for the matrix-vector product y = Ax.
2018 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2019 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2020 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2021            submatrix  (same for all local rows)
2022 .  d_nnz - array containing the number of block nonzeros in the various block rows
2023            in the upper triangular portion of the in diagonal portion of the local
2024            (possibly different for each block block row) or PETSC_NULL.
2025            You must leave room for the diagonal entry even if it is zero.
2026 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2027            submatrix (same for all local rows).
2028 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2029            off-diagonal portion of the local submatrix (possibly different for
2030            each block row) or PETSC_NULL.
2031 
2032    Output Parameter:
2033 .  A - the matrix
2034 
2035    Options Database Keys:
2036 .   -mat_no_unroll - uses code that does not unroll the loops in the
2037                      block calculations (much slower)
2038 .   -mat_block_size - size of the blocks to use
2039 .   -mat_mpi - use the parallel matrix data structures even on one processor
2040                (defaults to using SeqBAIJ format on one processor)
2041 
2042    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2043    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2044    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2045 
2046    Notes:
2047    The number of rows and columns must be divisible by blocksize.
2048    This matrix type does not support complex Hermitian operation.
2049 
2050    The user MUST specify either the local or global matrix dimensions
2051    (possibly both).
2052 
2053    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2054    than it must be used on all processors that share the object for that argument.
2055 
2056    If the *_nnz parameter is given then the *_nz parameter is ignored
2057 
2058    Storage Information:
2059    For a square global matrix we define each processor's diagonal portion
2060    to be its local rows and the corresponding columns (a square submatrix);
2061    each processor's off-diagonal portion encompasses the remainder of the
2062    local matrix (a rectangular submatrix).
2063 
2064    The user can specify preallocated storage for the diagonal part of
2065    the local submatrix with either d_nz or d_nnz (not both).  Set
2066    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2067    memory allocation.  Likewise, specify preallocated storage for the
2068    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2069 
2070    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2071    the figure below we depict these three local rows and all columns (0-11).
2072 
2073 .vb
2074            0 1 2 3 4 5 6 7 8 9 10 11
2075           -------------------
2076    row 3  |  o o o d d d o o o o o o
2077    row 4  |  o o o d d d o o o o o o
2078    row 5  |  o o o d d d o o o o o o
2079           -------------------
2080 .ve
2081 
2082    Thus, any entries in the d locations are stored in the d (diagonal)
2083    submatrix, and any entries in the o locations are stored in the
2084    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2085    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2086 
2087    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2088    plus the diagonal part of the d matrix,
2089    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2090    In general, for PDE problems in which most nonzeros are near the diagonal,
2091    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2092    or you will get TERRIBLE performance; see the users' manual chapter on
2093    matrices.
2094 
2095    Level: intermediate
2096 
2097 .keywords: matrix, block, aij, compressed row, sparse, parallel
2098 
2099 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2100 @*/
2101 
2102 PetscErrorCode  MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2103 {
2104   PetscErrorCode ierr;
2105   PetscMPIInt    size;
2106 
2107   PetscFunctionBegin;
2108   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2109   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2110   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2111   if (size > 1) {
2112     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2113     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2114   } else {
2115     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2116     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2117   }
2118   PetscFunctionReturn(0);
2119 }
2120 
2121 
2122 #undef __FUNCT__
2123 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2124 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2125 {
2126   Mat            mat;
2127   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2128   PetscErrorCode ierr;
2129   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2130   PetscScalar    *array;
2131 
2132   PetscFunctionBegin;
2133   *newmat       = 0;
2134   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2135   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2136   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2137   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2138   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2139   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2140 
2141   mat->factortype   = matin->factortype;
2142   mat->preallocated = PETSC_TRUE;
2143   mat->assembled    = PETSC_TRUE;
2144   mat->insertmode   = NOT_SET_VALUES;
2145 
2146   a = (Mat_MPISBAIJ*)mat->data;
2147   a->bs2   = oldmat->bs2;
2148   a->mbs   = oldmat->mbs;
2149   a->nbs   = oldmat->nbs;
2150   a->Mbs   = oldmat->Mbs;
2151   a->Nbs   = oldmat->Nbs;
2152 
2153 
2154   a->size         = oldmat->size;
2155   a->rank         = oldmat->rank;
2156   a->donotstash   = oldmat->donotstash;
2157   a->roworiented  = oldmat->roworiented;
2158   a->rowindices   = 0;
2159   a->rowvalues    = 0;
2160   a->getrowactive = PETSC_FALSE;
2161   a->barray       = 0;
2162   a->rstartbs    = oldmat->rstartbs;
2163   a->rendbs      = oldmat->rendbs;
2164   a->cstartbs    = oldmat->cstartbs;
2165   a->cendbs      = oldmat->cendbs;
2166 
2167   /* hash table stuff */
2168   a->ht           = 0;
2169   a->hd           = 0;
2170   a->ht_size      = 0;
2171   a->ht_flag      = oldmat->ht_flag;
2172   a->ht_fact      = oldmat->ht_fact;
2173   a->ht_total_ct  = 0;
2174   a->ht_insert_ct = 0;
2175 
2176   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2177   if (oldmat->colmap) {
2178 #if defined (PETSC_USE_CTABLE)
2179     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2180 #else
2181     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2182     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2183     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2184 #endif
2185   } else a->colmap = 0;
2186 
2187   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2188     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2189     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2190     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2191   } else a->garray = 0;
2192 
2193   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2194   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2195   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2196   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2197   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2198 
2199   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2200   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2201   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2202   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2203 
2204   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2205   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2206   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2207   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2208   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2209   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2210   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2211   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2212   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2213   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2214   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2215   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2216   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2217 
2218   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2219   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2220   a->sMvctx = oldmat->sMvctx;
2221   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2222 
2223   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2224   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2225   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2226   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2227   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2228   *newmat = mat;
2229   PetscFunctionReturn(0);
2230 }
2231 
2232 #undef __FUNCT__
2233 #define __FUNCT__ "MatLoad_MPISBAIJ"
2234 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2235 {
2236   PetscErrorCode ierr;
2237   PetscInt       i,nz,j,rstart,rend;
2238   PetscScalar    *vals,*buf;
2239   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2240   MPI_Status     status;
2241   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2242   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2243   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2244   PetscInt       bs=1,Mbs,mbs,extra_rows;
2245   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2246   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2247   int            fd;
2248 
2249   PetscFunctionBegin;
2250   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2251     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2252   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2253 
2254   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2255   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2256   if (!rank) {
2257     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2258     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2259     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2260     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2261   }
2262 
2263   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2264 
2265   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2266   M = header[1]; N = header[2];
2267 
2268   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2269   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2270   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2271 
2272   /* If global sizes are set, check if they are consistent with that given in the file */
2273   if (sizesset) {
2274     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2275   }
2276   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2277   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2278 
2279   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2280 
2281   /*
2282      This code adds extra rows to make sure the number of rows is
2283      divisible by the blocksize
2284   */
2285   Mbs        = M/bs;
2286   extra_rows = bs - M + bs*(Mbs);
2287   if (extra_rows == bs) extra_rows = 0;
2288   else                  Mbs++;
2289   if (extra_rows &&!rank) {
2290     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2291   }
2292 
2293   /* determine ownership of all rows */
2294   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2295     mbs        = Mbs/size + ((Mbs % size) > rank);
2296     m          = mbs*bs;
2297   } else { /* User Set */
2298     m          = newmat->rmap->n;
2299     mbs        = m/bs;
2300   }
2301   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2302   mmbs       = PetscMPIIntCast(mbs);
2303   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2304   rowners[0] = 0;
2305   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2306   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2307   rstart = rowners[rank];
2308   rend   = rowners[rank+1];
2309 
2310   /* distribute row lengths to all processors */
2311   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2312   if (!rank) {
2313     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2314     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2315     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2316     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2317     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2318     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2319     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2320   } else {
2321     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2322   }
2323 
2324   if (!rank) {   /* procs[0] */
2325     /* calculate the number of nonzeros on each processor */
2326     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2327     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2328     for (i=0; i<size; i++) {
2329       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2330         procsnz[i] += rowlengths[j];
2331       }
2332     }
2333     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2334 
2335     /* determine max buffer needed and allocate it */
2336     maxnz = 0;
2337     for (i=0; i<size; i++) {
2338       maxnz = PetscMax(maxnz,procsnz[i]);
2339     }
2340     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2341 
2342     /* read in my part of the matrix column indices  */
2343     nz     = procsnz[0];
2344     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2345     mycols = ibuf;
2346     if (size == 1)  nz -= extra_rows;
2347     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2348     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2349 
2350     /* read in every ones (except the last) and ship off */
2351     for (i=1; i<size-1; i++) {
2352       nz   = procsnz[i];
2353       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2354       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2355     }
2356     /* read in the stuff for the last proc */
2357     if (size != 1) {
2358       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2359       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2360       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2361       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2362     }
2363     ierr = PetscFree(cols);CHKERRQ(ierr);
2364   } else {  /* procs[i], i>0 */
2365     /* determine buffer space needed for message */
2366     nz = 0;
2367     for (i=0; i<m; i++) {
2368       nz += locrowlens[i];
2369     }
2370     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2371     mycols = ibuf;
2372     /* receive message of column indices*/
2373     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2374     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2375     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2376   }
2377 
2378   /* loop over local rows, determining number of off diagonal entries */
2379   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2380   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2381   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2382   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2383   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2384   rowcount = 0;
2385   nzcount  = 0;
2386   for (i=0; i<mbs; i++) {
2387     dcount  = 0;
2388     odcount = 0;
2389     for (j=0; j<bs; j++) {
2390       kmax = locrowlens[rowcount];
2391       for (k=0; k<kmax; k++) {
2392         tmp = mycols[nzcount++]/bs; /* block col. index */
2393         if (!mask[tmp]) {
2394           mask[tmp] = 1;
2395           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2396           else masked1[dcount++] = tmp; /* entry in diag portion */
2397         }
2398       }
2399       rowcount++;
2400     }
2401 
2402     dlens[i]  = dcount;  /* d_nzz[i] */
2403     odlens[i] = odcount; /* o_nzz[i] */
2404 
2405     /* zero out the mask elements we set */
2406     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2407     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2408   }
2409     if (!sizesset) {
2410     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2411   }
2412   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2413   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2414 
2415   if (!rank) {
2416     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2417     /* read in my part of the matrix numerical values  */
2418     nz = procsnz[0];
2419     vals = buf;
2420     mycols = ibuf;
2421     if (size == 1)  nz -= extra_rows;
2422     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2423     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2424 
2425     /* insert into matrix */
2426     jj      = rstart*bs;
2427     for (i=0; i<m; i++) {
2428       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2429       mycols += locrowlens[i];
2430       vals   += locrowlens[i];
2431       jj++;
2432     }
2433 
2434     /* read in other processors (except the last one) and ship out */
2435     for (i=1; i<size-1; i++) {
2436       nz   = procsnz[i];
2437       vals = buf;
2438       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2439       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2440     }
2441     /* the last proc */
2442     if (size != 1){
2443       nz   = procsnz[i] - extra_rows;
2444       vals = buf;
2445       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2446       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2447       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2448     }
2449     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2450 
2451   } else {
2452     /* receive numeric values */
2453     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2454 
2455     /* receive message of values*/
2456     vals   = buf;
2457     mycols = ibuf;
2458     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2459     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2460     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2461 
2462     /* insert into matrix */
2463     jj      = rstart*bs;
2464     for (i=0; i<m; i++) {
2465       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2466       mycols += locrowlens[i];
2467       vals   += locrowlens[i];
2468       jj++;
2469     }
2470   }
2471 
2472   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2473   ierr = PetscFree(buf);CHKERRQ(ierr);
2474   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2475   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2476   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2477   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2478   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2479   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2485 /*XXXXX@
2486    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2487 
2488    Input Parameters:
2489 .  mat  - the matrix
2490 .  fact - factor
2491 
2492    Not Collective on Mat, each process can have a different hash factor
2493 
2494    Level: advanced
2495 
2496   Notes:
2497    This can also be set by the command line option: -mat_use_hash_table fact
2498 
2499 .keywords: matrix, hashtable, factor, HT
2500 
2501 .seealso: MatSetOption()
2502 @XXXXX*/
2503 
2504 
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2507 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2508 {
2509   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2510   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2511   PetscReal      atmp;
2512   PetscReal      *work,*svalues,*rvalues;
2513   PetscErrorCode ierr;
2514   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2515   PetscMPIInt    rank,size;
2516   PetscInt       *rowners_bs,dest,count,source;
2517   PetscScalar    *va;
2518   MatScalar      *ba;
2519   MPI_Status     stat;
2520 
2521   PetscFunctionBegin;
2522   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2523   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2524   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2525 
2526   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2527   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2528 
2529   bs   = A->rmap->bs;
2530   mbs  = a->mbs;
2531   Mbs  = a->Mbs;
2532   ba   = b->a;
2533   bi   = b->i;
2534   bj   = b->j;
2535 
2536   /* find ownerships */
2537   rowners_bs = A->rmap->range;
2538 
2539   /* each proc creates an array to be distributed */
2540   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2541   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2542 
2543   /* row_max for B */
2544   if (rank != size-1){
2545     for (i=0; i<mbs; i++) {
2546       ncols = bi[1] - bi[0]; bi++;
2547       brow  = bs*i;
2548       for (j=0; j<ncols; j++){
2549         bcol = bs*(*bj);
2550         for (kcol=0; kcol<bs; kcol++){
2551           col = bcol + kcol;                 /* local col index */
2552           col += rowners_bs[rank+1];      /* global col index */
2553           for (krow=0; krow<bs; krow++){
2554             atmp = PetscAbsScalar(*ba); ba++;
2555             row = brow + krow;    /* local row index */
2556             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2557             if (work[col] < atmp) work[col] = atmp;
2558           }
2559         }
2560         bj++;
2561       }
2562     }
2563 
2564     /* send values to its owners */
2565     for (dest=rank+1; dest<size; dest++){
2566       svalues = work + rowners_bs[dest];
2567       count   = rowners_bs[dest+1]-rowners_bs[dest];
2568       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2569     }
2570   }
2571 
2572   /* receive values */
2573   if (rank){
2574     rvalues = work;
2575     count   = rowners_bs[rank+1]-rowners_bs[rank];
2576     for (source=0; source<rank; source++){
2577       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2578       /* process values */
2579       for (i=0; i<count; i++){
2580         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2581       }
2582     }
2583   }
2584 
2585   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2586   ierr = PetscFree(work);CHKERRQ(ierr);
2587   PetscFunctionReturn(0);
2588 }
2589 
2590 #undef __FUNCT__
2591 #define __FUNCT__ "MatSOR_MPISBAIJ"
2592 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2593 {
2594   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2595   PetscErrorCode    ierr;
2596   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2597   PetscScalar       *x,*ptr,*from;
2598   Vec               bb1;
2599   const PetscScalar *b;
2600 
2601   PetscFunctionBegin;
2602   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2603   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2604 
2605   if (flag == SOR_APPLY_UPPER) {
2606     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2607     PetscFunctionReturn(0);
2608   }
2609 
2610   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2611     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2612       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2613       its--;
2614     }
2615 
2616     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2617     while (its--){
2618 
2619       /* lower triangular part: slvec0b = - B^T*xx */
2620       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2621 
2622       /* copy xx into slvec0a */
2623       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2624       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2625       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2626       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2627 
2628       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2629 
2630       /* copy bb into slvec1a */
2631       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2632       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2633       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2634       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2635 
2636       /* set slvec1b = 0 */
2637       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2638 
2639       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2640       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2641       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2642       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2643 
2644       /* upper triangular part: bb1 = bb1 - B*x */
2645       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2646 
2647       /* local diagonal sweep */
2648       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2649     }
2650     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2651   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2652     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2653   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2654     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2655   } else if (flag & SOR_EISENSTAT) {
2656     Vec               xx1;
2657     PetscBool         hasop;
2658     const PetscScalar *diag;
2659     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2660     PetscInt          i,n;
2661 
2662     if (!mat->xx1) {
2663       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2664       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2665     }
2666     xx1 = mat->xx1;
2667     bb1 = mat->bb1;
2668 
2669     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2670 
2671     if (!mat->diag) {
2672       /* this is wrong for same matrix with new nonzero values */
2673       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2674       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2675     }
2676     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2677 
2678     if (hasop) {
2679       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2680       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2681     } else {
2682       /*
2683           These two lines are replaced by code that may be a bit faster for a good compiler
2684       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2685       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2686       */
2687       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2688       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2689       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2690       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2691       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2692       if (omega == 1.0) {
2693 	for (i=0; i<n; i++) {
2694 	  sl[i] = b[i] - diag[i]*x[i];
2695 	}
2696         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2697       } else {
2698 	for (i=0; i<n; i++) {
2699 	  sl[i] = b[i] + scale*diag[i]*x[i];
2700 	}
2701         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2702       }
2703       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2704       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2705       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2706       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2707     }
2708 
2709     /* multiply off-diagonal portion of matrix */
2710     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2711     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2712     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2713     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2714     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2715     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2716     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2717     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2718     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2719     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2720 
2721     /* local sweep */
2722     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2723     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2724   } else {
2725     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2726   }
2727   PetscFunctionReturn(0);
2728 }
2729 
2730 #undef __FUNCT__
2731 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2732 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2733 {
2734   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2735   PetscErrorCode ierr;
2736   Vec            lvec1,bb1;
2737 
2738   PetscFunctionBegin;
2739   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2740   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2741 
2742   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2743     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2744       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2745       its--;
2746     }
2747 
2748     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2749     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2750     while (its--){
2751       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2752 
2753       /* lower diagonal part: bb1 = bb - B^T*xx */
2754       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2755       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2756 
2757       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2758       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2759       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2760 
2761       /* upper diagonal part: bb1 = bb1 - B*x */
2762       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2763       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2764 
2765       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2766 
2767       /* diagonal sweep */
2768       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2769     }
2770     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2771     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2772   } else {
2773     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2774   }
2775   PetscFunctionReturn(0);
2776 }
2777 
2778 #undef __FUNCT__
2779 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2780 /*@
2781      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2782          CSR format the local rows.
2783 
2784    Collective on MPI_Comm
2785 
2786    Input Parameters:
2787 +  comm - MPI communicator
2788 .  bs - the block size, only a block size of 1 is supported
2789 .  m - number of local rows (Cannot be PETSC_DECIDE)
2790 .  n - This value should be the same as the local size used in creating the
2791        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2792        calculated if N is given) For square matrices n is almost always m.
2793 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2794 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2795 .   i - row indices
2796 .   j - column indices
2797 -   a - matrix values
2798 
2799    Output Parameter:
2800 .   mat - the matrix
2801 
2802    Level: intermediate
2803 
2804    Notes:
2805        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2806      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2807      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2808 
2809        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2810 
2811 .keywords: matrix, aij, compressed row, sparse, parallel
2812 
2813 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2814           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2815 @*/
2816 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2817 {
2818   PetscErrorCode ierr;
2819 
2820 
2821  PetscFunctionBegin;
2822   if (i[0]) {
2823     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2824   }
2825   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2826   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2827   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2828   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2829   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2830   PetscFunctionReturn(0);
2831 }
2832 
2833 
2834 #undef __FUNCT__
2835 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2836 /*@C
2837    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2838    (the default parallel PETSc format).
2839 
2840    Collective on MPI_Comm
2841 
2842    Input Parameters:
2843 +  A - the matrix
2844 .  bs - the block size
2845 .  i - the indices into j for the start of each local row (starts with zero)
2846 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2847 -  v - optional values in the matrix
2848 
2849    Level: developer
2850 
2851 .keywords: matrix, aij, compressed row, sparse, parallel
2852 
2853 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2854 @*/
2855 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2856 {
2857   PetscErrorCode ierr;
2858 
2859   PetscFunctionBegin;
2860   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2861   PetscFunctionReturn(0);
2862 }
2863 
2864 
2865