xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision aa5a9175f561bb9a2496c6676bd5bb0429483743)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
349       if (!baij->donotstash) {
350         if (roworiented) {
351           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
352         } else {
353           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
354         }
355       }
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatGetValues_MPISBAIJ"
363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
364 {
365   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
366   PetscErrorCode ierr;
367   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
368   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
373     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
374     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
375       row = idxm[i] - bsrstart;
376       for (j=0; j<n; j++) {
377         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
378         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
379         if (idxn[j] >= bscstart && idxn[j] < bscend){
380           col = idxn[j] - bscstart;
381           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
382         } else {
383           if (!baij->colmap) {
384             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
385           }
386 #if defined (PETSC_USE_CTABLE)
387           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
388           data --;
389 #else
390           data = baij->colmap[idxn[j]/bs]-1;
391 #endif
392           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
393           else {
394             col  = data + idxn[j]%bs;
395             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
396           }
397         }
398       }
399     } else {
400       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
401     }
402   }
403  PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatNorm_MPISBAIJ"
408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
409 {
410   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscReal      sum[2],*lnorm2;
413 
414   PetscFunctionBegin;
415   if (baij->size == 1) {
416     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
417   } else {
418     if (type == NORM_FROBENIUS) {
419       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
420       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
421       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
422       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
423       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
424       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
425       *norm = sqrt(sum[0] + 2*sum[1]);
426       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
427     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
428       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
429       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
430       PetscReal    *rsum,*rsum2,vabs;
431       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
432       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
433       MatScalar    *v;
434 
435       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
436       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
437       /* Amat */
438       v = amat->a; jj = amat->j;
439       for (brow=0; brow<mbs; brow++) {
440         grow = bs*(rstart + brow);
441         nz = amat->i[brow+1] - amat->i[brow];
442         for (bcol=0; bcol<nz; bcol++){
443           gcol = bs*(rstart + *jj); jj++;
444           for (col=0; col<bs; col++){
445             for (row=0; row<bs; row++){
446               vabs = PetscAbsScalar(*v); v++;
447               rsum[gcol+col] += vabs;
448               /* non-diagonal block */
449               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
450             }
451           }
452         }
453       }
454       /* Bmat */
455       v = bmat->a; jj = bmat->j;
456       for (brow=0; brow<mbs; brow++) {
457         grow = bs*(rstart + brow);
458         nz = bmat->i[brow+1] - bmat->i[brow];
459         for (bcol=0; bcol<nz; bcol++){
460           gcol = bs*garray[*jj]; jj++;
461           for (col=0; col<bs; col++){
462             for (row=0; row<bs; row++){
463               vabs = PetscAbsScalar(*v); v++;
464               rsum[gcol+col] += vabs;
465               rsum[grow+row] += vabs;
466             }
467           }
468         }
469       }
470       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
471       *norm = 0.0;
472       for (col=0; col<mat->cmap->N; col++) {
473         if (rsum2[col] > *norm) *norm = rsum2[col];
474       }
475       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
476     } else {
477       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
486 {
487   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
488   PetscErrorCode ierr;
489   PetscInt       nstash,reallocs;
490   InsertMode     addv;
491 
492   PetscFunctionBegin;
493   if (baij->donotstash || mat->nooffprocentries) {
494     PetscFunctionReturn(0);
495   }
496 
497   /* make sure all processors are either in INSERTMODE or ADDMODE */
498   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
499   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
500   mat->insertmode = addv; /* in case this processor had no cache */
501 
502   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
503   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
504   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
505   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
506   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
507   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
514 {
515   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
516   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
517   PetscErrorCode ierr;
518   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
519   PetscInt       *row,*col;
520   PetscBool      other_disassembled;
521   PetscMPIInt    n;
522   PetscBool      r1,r2,r3;
523   MatScalar      *val;
524   InsertMode     addv = mat->insertmode;
525 
526   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
527   PetscFunctionBegin;
528 
529   if (!baij->donotstash &&  !mat->nooffprocentries) {
530     while (1) {
531       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
532       if (!flg) break;
533 
534       for (i=0; i<n;) {
535         /* Now identify the consecutive vals belonging to the same row */
536         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
537         if (j < n) ncols = j-i;
538         else       ncols = n-i;
539         /* Now assemble all these values with a single function call */
540         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
541         i = j;
542       }
543     }
544     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
545     /* Now process the block-stash. Since the values are stashed column-oriented,
546        set the roworiented flag to column oriented, and after MatSetValues()
547        restore the original flags */
548     r1 = baij->roworiented;
549     r2 = a->roworiented;
550     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
551     baij->roworiented = PETSC_FALSE;
552     a->roworiented    = PETSC_FALSE;
553     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
554     while (1) {
555       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
556       if (!flg) break;
557 
558       for (i=0; i<n;) {
559         /* Now identify the consecutive vals belonging to the same row */
560         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
561         if (j < n) ncols = j-i;
562         else       ncols = n-i;
563         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
564         i = j;
565       }
566     }
567     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
568     baij->roworiented = r1;
569     a->roworiented    = r2;
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
593   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
594   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
595 
596   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
597   baij->rowvalues = 0;
598 
599   PetscFunctionReturn(0);
600 }
601 
602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
603 #undef __FUNCT__
604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
606 {
607   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
608   PetscErrorCode    ierr;
609   PetscInt          bs = mat->rmap->bs;
610   PetscMPIInt       size = baij->size,rank = baij->rank;
611   PetscBool         iascii,isdraw;
612   PetscViewer       sviewer;
613   PetscViewerFormat format;
614 
615   PetscFunctionBegin;
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
618   if (iascii) {
619     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
620     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
621       MatInfo info;
622       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
623       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
624       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
633       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
639       PetscFunctionReturn(0);
640     }
641   }
642 
643   if (isdraw) {
644     PetscDraw  draw;
645     PetscBool  isnull;
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
648   }
649 
650   if (size == 1) {
651     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
652     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
653   } else {
654     /* assemble the entire matrix onto first processor. */
655     Mat          A;
656     Mat_SeqSBAIJ *Aloc;
657     Mat_SeqBAIJ  *Bloc;
658     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
659     MatScalar    *a;
660 
661     /* Should this be the same type as mat? */
662     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
669     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
670     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
671 
672     /* copy over the A part */
673     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
674     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
675     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
676 
677     for (i=0; i<mbs; i++) {
678       rvals[0] = bs*(baij->rstartbs + i);
679       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
680       for (j=ai[i]; j<ai[i+1]; j++) {
681         col = (baij->cstartbs+aj[j])*bs;
682         for (k=0; k<bs; k++) {
683           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
684           col++; a += bs;
685         }
686       }
687     }
688     /* copy over the B part */
689     Bloc = (Mat_SeqBAIJ*)baij->B->data;
690     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
691     for (i=0; i<mbs; i++) {
692 
693       rvals[0] = bs*(baij->rstartbs + i);
694       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
695       for (j=ai[i]; j<ai[i+1]; j++) {
696         col = baij->garray[aj[j]]*bs;
697         for (k=0; k<bs; k++) {
698           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
699           col++; a += bs;
700         }
701       }
702     }
703     ierr = PetscFree(rvals);CHKERRQ(ierr);
704     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     /*
707        Everyone has to call to draw the matrix since the graphics waits are
708        synchronized across all processors that share the PetscDraw object
709     */
710     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
713           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
714       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
715       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
716     }
717     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     ierr = MatDestroy(&A);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPISBAIJ"
725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
726 {
727   PetscErrorCode ierr;
728   PetscBool      iascii,isdraw,issocket,isbinary;
729 
730   PetscFunctionBegin;
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
735   if (iascii || isdraw || issocket || isbinary) {
736     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
737   } else {
738     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatDestroy_MPISBAIJ"
745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751 #if defined(PETSC_USE_LOG)
752   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
753 #endif
754   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
755   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
756   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
757   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
758 #if defined (PETSC_USE_CTABLE)
759   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
760 #else
761   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
762 #endif
763   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
765   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
771   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
772   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
773   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
774   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
776   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
778 #if defined(PETSC_USE_REAL_MAT_SINGLE)
779   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
780 #endif
781   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
782   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
784   ierr = PetscFree(mat->data);CHKERRQ(ierr);
785 
786   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
791   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatConvert_mpisbaij_mpisbstrm_C","",PETSC_NULL);CHKERRQ(ierr);
792   PetscFunctionReturn(0);
793 }
794 
795 #undef __FUNCT__
796 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
797 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
798 {
799   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
800   PetscErrorCode ierr;
801   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
802   PetscScalar    *x,*from;
803 
804   PetscFunctionBegin;
805   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
806   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
807 
808   /* diagonal part */
809   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
810   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
811 
812   /* subdiagonal part */
813   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
814 
815   /* copy x into the vec slvec0 */
816   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
817   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
818 
819   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
820   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
821   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
822 
823   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
825   /* supperdiagonal part */
826   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
827   PetscFunctionReturn(0);
828 }
829 
830 #undef __FUNCT__
831 #define __FUNCT__ "MatMult_MPISBAIJ"
832 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
833 {
834   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
835   PetscErrorCode ierr;
836   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
837   PetscScalar    *x,*from;
838 
839   PetscFunctionBegin;
840   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
841   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
842 
843   /* diagonal part */
844   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
845   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
846 
847   /* subdiagonal part */
848   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
849 
850   /* copy x into the vec slvec0 */
851   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
852   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
853 
854   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
855   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
856   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
857 
858   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
860   /* supperdiagonal part */
861   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
862   PetscFunctionReturn(0);
863 }
864 
865 #undef __FUNCT__
866 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
867 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
868 {
869   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
870   PetscErrorCode ierr;
871   PetscInt       nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876 
877   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
878   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
879 
880   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
881   /* do diagonal part */
882   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
883   /* do supperdiagonal part */
884   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
885   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
886   /* do subdiagonal part */
887   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
888   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
890 
891   PetscFunctionReturn(0);
892 }
893 
894 #undef __FUNCT__
895 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
896 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
897 {
898   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
899   PetscErrorCode ierr;
900   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
901   PetscScalar    *x,*from,zero=0.0;
902 
903   PetscFunctionBegin;
904   /*
905   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
906   PetscSynchronizedFlush(((PetscObject)A)->comm);
907   */
908   /* diagonal part */
909   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
910   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
911 
912   /* subdiagonal part */
913   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
914 
915   /* copy x into the vec slvec0 */
916   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
917   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
918   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
919   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
920 
921   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
923   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924 
925   /* supperdiagonal part */
926   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
933 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937 
938   PetscFunctionBegin;
939   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
940   /* do diagonal part */
941   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
942   /* do supperdiagonal part */
943   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
944   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
945 
946   /* do subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
948   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
950 
951   PetscFunctionReturn(0);
952 }
953 
954 /*
955   This only works correctly for square matrices where the subblock A->A is the
956    diagonal block
957 */
958 #undef __FUNCT__
959 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
960 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
961 {
962   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
963   PetscErrorCode ierr;
964 
965   PetscFunctionBegin;
966   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
967   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatScale_MPISBAIJ"
973 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
980   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatGetRow_MPISBAIJ"
986 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
987 {
988   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
989   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
990   PetscErrorCode ierr;
991   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
992   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
993   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
994 
995   PetscFunctionBegin;
996   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
997   mat->getrowactive = PETSC_TRUE;
998 
999   if (!mat->rowvalues && (idx || v)) {
1000     /*
1001         allocate enough space to hold information from the longest row.
1002     */
1003     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1004     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1005     PetscInt     max = 1,mbs = mat->mbs,tmp;
1006     for (i=0; i<mbs; i++) {
1007       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1008       if (max < tmp) { max = tmp; }
1009     }
1010     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1011   }
1012 
1013   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1014   lrow = row - brstart;  /* local row index */
1015 
1016   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1017   if (!v)   {pvA = 0; pvB = 0;}
1018   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1019   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1020   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1021   nztot = nzA + nzB;
1022 
1023   cmap  = mat->garray;
1024   if (v  || idx) {
1025     if (nztot) {
1026       /* Sort by increasing column numbers, assuming A and B already sorted */
1027       PetscInt imark = -1;
1028       if (v) {
1029         *v = v_p = mat->rowvalues;
1030         for (i=0; i<nzB; i++) {
1031           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1032           else break;
1033         }
1034         imark = i;
1035         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1036         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1037       }
1038       if (idx) {
1039         *idx = idx_p = mat->rowindices;
1040         if (imark > -1) {
1041           for (i=0; i<imark; i++) {
1042             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1043           }
1044         } else {
1045           for (i=0; i<nzB; i++) {
1046             if (cmap[cworkB[i]/bs] < cstart)
1047               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1048             else break;
1049           }
1050           imark = i;
1051         }
1052         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1053         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1054       }
1055     } else {
1056       if (idx) *idx = 0;
1057       if (v)   *v   = 0;
1058     }
1059   }
1060   *nz = nztot;
1061   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   PetscFunctionReturn(0);
1064 }
1065 
1066 #undef __FUNCT__
1067 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1068 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1069 {
1070   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1071 
1072   PetscFunctionBegin;
1073   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1074   baij->getrowactive = PETSC_FALSE;
1075   PetscFunctionReturn(0);
1076 }
1077 
1078 #undef __FUNCT__
1079 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1080 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1081 {
1082   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1083   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1084 
1085   PetscFunctionBegin;
1086   aA->getrow_utriangular = PETSC_TRUE;
1087   PetscFunctionReturn(0);
1088 }
1089 #undef __FUNCT__
1090 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1091 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1092 {
1093   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1094   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1095 
1096   PetscFunctionBegin;
1097   aA->getrow_utriangular = PETSC_FALSE;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1103 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1110   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1111   PetscFunctionReturn(0);
1112 }
1113 
1114 #undef __FUNCT__
1115 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1116 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1117 {
1118   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1119   PetscErrorCode ierr;
1120 
1121   PetscFunctionBegin;
1122   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1123   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1129 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1130 {
1131   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1136   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1142 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1143 {
1144   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1145   Mat            A = a->A,B = a->B;
1146   PetscErrorCode ierr;
1147   PetscReal      isend[5],irecv[5];
1148 
1149   PetscFunctionBegin;
1150   info->block_size     = (PetscReal)matin->rmap->bs;
1151   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1152   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1153   isend[3] = info->memory;  isend[4] = info->mallocs;
1154   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1155   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1156   isend[3] += info->memory;  isend[4] += info->mallocs;
1157   if (flag == MAT_LOCAL) {
1158     info->nz_used      = isend[0];
1159     info->nz_allocated = isend[1];
1160     info->nz_unneeded  = isend[2];
1161     info->memory       = isend[3];
1162     info->mallocs      = isend[4];
1163   } else if (flag == MAT_GLOBAL_MAX) {
1164     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1165     info->nz_used      = irecv[0];
1166     info->nz_allocated = irecv[1];
1167     info->nz_unneeded  = irecv[2];
1168     info->memory       = irecv[3];
1169     info->mallocs      = irecv[4];
1170   } else if (flag == MAT_GLOBAL_SUM) {
1171     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1172     info->nz_used      = irecv[0];
1173     info->nz_allocated = irecv[1];
1174     info->nz_unneeded  = irecv[2];
1175     info->memory       = irecv[3];
1176     info->mallocs      = irecv[4];
1177   } else {
1178     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1179   }
1180   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1181   info->fill_ratio_needed = 0;
1182   info->factor_mallocs    = 0;
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1188 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1189 {
1190   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1191   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1192   PetscErrorCode ierr;
1193 
1194   PetscFunctionBegin;
1195   switch (op) {
1196   case MAT_NEW_NONZERO_LOCATIONS:
1197   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1198   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1199   case MAT_KEEP_NONZERO_PATTERN:
1200   case MAT_NEW_NONZERO_LOCATION_ERR:
1201     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROW_ORIENTED:
1205     a->roworiented = flg;
1206     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1207     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1208     break;
1209   case MAT_NEW_DIAGONALS:
1210     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1211     break;
1212   case MAT_IGNORE_OFF_PROC_ENTRIES:
1213     a->donotstash = flg;
1214     break;
1215   case MAT_USE_HASH_TABLE:
1216     a->ht_flag = flg;
1217     break;
1218   case MAT_HERMITIAN:
1219     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1220     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1221     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1222     break;
1223   case MAT_SPD:
1224     A->spd_set                         = PETSC_TRUE;
1225     A->spd                             = flg;
1226     if (flg) {
1227       A->symmetric                     = PETSC_TRUE;
1228       A->structurally_symmetric        = PETSC_TRUE;
1229       A->symmetric_set                 = PETSC_TRUE;
1230       A->structurally_symmetric_set    = PETSC_TRUE;
1231     }
1232     break;
1233   case MAT_SYMMETRIC:
1234     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1235     break;
1236   case MAT_STRUCTURALLY_SYMMETRIC:
1237     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1238     break;
1239   case MAT_SYMMETRY_ETERNAL:
1240     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1241     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1242     break;
1243   case MAT_IGNORE_LOWER_TRIANGULAR:
1244     aA->ignore_ltriangular = flg;
1245     break;
1246   case MAT_ERROR_LOWER_TRIANGULAR:
1247     aA->ignore_ltriangular = flg;
1248     break;
1249   case MAT_GETROW_UPPERTRIANGULAR:
1250     aA->getrow_utriangular = flg;
1251     break;
1252   default:
1253     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1254   }
1255   PetscFunctionReturn(0);
1256 }
1257 
1258 #undef __FUNCT__
1259 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1260 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1261 {
1262   PetscErrorCode ierr;
1263   PetscFunctionBegin;
1264   if (MAT_INITIAL_MATRIX || *B != A) {
1265     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1266   }
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1272 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1273 {
1274   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1275   Mat            a=baij->A, b=baij->B;
1276   PetscErrorCode ierr;
1277   PetscInt       nv,m,n;
1278   PetscBool      flg;
1279 
1280   PetscFunctionBegin;
1281   if (ll != rr){
1282     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1283     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1284   }
1285   if (!ll) PetscFunctionReturn(0);
1286 
1287   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1288   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1289 
1290   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1291   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1292 
1293   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1294 
1295   /* left diagonalscale the off-diagonal part */
1296   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1297 
1298   /* scale the diagonal part */
1299   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1300 
1301   /* right diagonalscale the off-diagonal part */
1302   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1303   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1304   PetscFunctionReturn(0);
1305 }
1306 
1307 #undef __FUNCT__
1308 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1309 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1310 {
1311   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1312   PetscErrorCode ierr;
1313 
1314   PetscFunctionBegin;
1315   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1316   PetscFunctionReturn(0);
1317 }
1318 
1319 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatEqual_MPISBAIJ"
1323 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1324 {
1325   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1326   Mat            a,b,c,d;
1327   PetscBool      flg;
1328   PetscErrorCode ierr;
1329 
1330   PetscFunctionBegin;
1331   a = matA->A; b = matA->B;
1332   c = matB->A; d = matB->B;
1333 
1334   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1335   if (flg) {
1336     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1337   }
1338   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatCopy_MPISBAIJ"
1344 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1345 {
1346   PetscErrorCode ierr;
1347   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1348   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1349 
1350   PetscFunctionBegin;
1351   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1352   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1353     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1354     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1355     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1356   } else {
1357     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1358     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1359   }
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1365 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1366 {
1367   PetscErrorCode ierr;
1368 
1369   PetscFunctionBegin;
1370   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1376 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1377 {
1378   PetscErrorCode ierr;
1379   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1380   PetscBLASInt   bnz,one=1;
1381   Mat_SeqSBAIJ   *xa,*ya;
1382   Mat_SeqBAIJ    *xb,*yb;
1383 
1384   PetscFunctionBegin;
1385   if (str == SAME_NONZERO_PATTERN) {
1386     PetscScalar alpha = a;
1387     xa = (Mat_SeqSBAIJ *)xx->A->data;
1388     ya = (Mat_SeqSBAIJ *)yy->A->data;
1389     bnz = PetscBLASIntCast(xa->nz);
1390     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1391     xb = (Mat_SeqBAIJ *)xx->B->data;
1392     yb = (Mat_SeqBAIJ *)yy->B->data;
1393     bnz = PetscBLASIntCast(xb->nz);
1394     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1395   } else {
1396     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1397     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1398     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1399   }
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1405 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1406 {
1407   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1408   PetscInt        rbs,cbs;
1409   PetscErrorCode  ierr;
1410 
1411   PetscFunctionBegin;
1412   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1413   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1414   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1415   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1416   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1417   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1418   PetscFunctionReturn(0);
1419 }
1420 
1421 #undef __FUNCT__
1422 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1423 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1424 {
1425   PetscErrorCode ierr;
1426   PetscInt       i;
1427   PetscBool      flg;
1428 
1429   PetscFunctionBegin;
1430   for (i=0; i<n; i++) {
1431     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1432     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1433   }
1434   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 
1439 /* -------------------------------------------------------------------*/
1440 static struct _MatOps MatOps_Values = {
1441        MatSetValues_MPISBAIJ,
1442        MatGetRow_MPISBAIJ,
1443        MatRestoreRow_MPISBAIJ,
1444        MatMult_MPISBAIJ,
1445 /* 4*/ MatMultAdd_MPISBAIJ,
1446        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1447        MatMultAdd_MPISBAIJ,
1448        0,
1449        0,
1450        0,
1451 /*10*/ 0,
1452        0,
1453        0,
1454        MatSOR_MPISBAIJ,
1455        MatTranspose_MPISBAIJ,
1456 /*15*/ MatGetInfo_MPISBAIJ,
1457        MatEqual_MPISBAIJ,
1458        MatGetDiagonal_MPISBAIJ,
1459        MatDiagonalScale_MPISBAIJ,
1460        MatNorm_MPISBAIJ,
1461 /*20*/ MatAssemblyBegin_MPISBAIJ,
1462        MatAssemblyEnd_MPISBAIJ,
1463        MatSetOption_MPISBAIJ,
1464        MatZeroEntries_MPISBAIJ,
1465 /*24*/ 0,
1466        0,
1467        0,
1468        0,
1469        0,
1470 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1471        0,
1472        0,
1473        0,
1474        0,
1475 /*34*/ MatDuplicate_MPISBAIJ,
1476        0,
1477        0,
1478        0,
1479        0,
1480 /*39*/ MatAXPY_MPISBAIJ,
1481        MatGetSubMatrices_MPISBAIJ,
1482        MatIncreaseOverlap_MPISBAIJ,
1483        MatGetValues_MPISBAIJ,
1484        MatCopy_MPISBAIJ,
1485 /*44*/ 0,
1486        MatScale_MPISBAIJ,
1487        0,
1488        0,
1489        0,
1490 /*49*/ MatSetBlockSize_MPISBAIJ,
1491        0,
1492        0,
1493        0,
1494        0,
1495 /*54*/ 0,
1496        0,
1497        MatSetUnfactored_MPISBAIJ,
1498        0,
1499        MatSetValuesBlocked_MPISBAIJ,
1500 /*59*/ 0,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*64*/ 0,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*74*/ 0,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*79*/ 0,
1521        0,
1522        0,
1523        0,
1524        MatLoad_MPISBAIJ,
1525 /*84*/ 0,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*89*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*94*/ 0,
1536        0,
1537        0,
1538        0,
1539        0,
1540 /*99*/ 0,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*104*/0,
1546        MatRealPart_MPISBAIJ,
1547        MatImaginaryPart_MPISBAIJ,
1548        MatGetRowUpperTriangular_MPISBAIJ,
1549        MatRestoreRowUpperTriangular_MPISBAIJ,
1550 /*109*/0,
1551        0,
1552        0,
1553        0,
1554        0,
1555 /*114*/0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*119*/0,
1561        0,
1562        0,
1563        0
1564 };
1565 
1566 
1567 EXTERN_C_BEGIN
1568 #undef __FUNCT__
1569 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1570 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1571 {
1572   PetscFunctionBegin;
1573   *a = ((Mat_MPISBAIJ *)A->data)->A;
1574   PetscFunctionReturn(0);
1575 }
1576 EXTERN_C_END
1577 
1578 EXTERN_C_BEGIN
1579 #undef __FUNCT__
1580 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1581 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1582 {
1583   Mat_MPISBAIJ   *b;
1584   PetscErrorCode ierr;
1585   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1586 
1587   PetscFunctionBegin;
1588   if (bs < 0){
1589     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1590       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1591     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1592     bs   = PetscAbs(bs);
1593   }
1594   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1595   bs = newbs;
1596 
1597   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1598   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1599   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1600   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1601 
1602   B->rmap->bs = B->cmap->bs = bs;
1603   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1604   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1605 
1606   if (d_nnz) {
1607     for (i=0; i<B->rmap->n/bs; i++) {
1608       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1609     }
1610   }
1611   if (o_nnz) {
1612     for (i=0; i<B->rmap->n/bs; i++) {
1613       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1614     }
1615   }
1616 
1617   b   = (Mat_MPISBAIJ*)B->data;
1618   mbs = B->rmap->n/bs;
1619   Mbs = B->rmap->N/bs;
1620   if (mbs*bs != B->rmap->n) {
1621     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1622   }
1623 
1624   B->rmap->bs  = bs;
1625   b->bs2 = bs*bs;
1626   b->mbs = mbs;
1627   b->nbs = mbs;
1628   b->Mbs = Mbs;
1629   b->Nbs = Mbs;
1630 
1631   for (i=0; i<=b->size; i++) {
1632     b->rangebs[i] = B->rmap->range[i]/bs;
1633   }
1634   b->rstartbs = B->rmap->rstart/bs;
1635   b->rendbs   = B->rmap->rend/bs;
1636 
1637   b->cstartbs = B->cmap->rstart/bs;
1638   b->cendbs   = B->cmap->rend/bs;
1639 
1640   if (!B->preallocated) {
1641     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1642     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1643     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1644     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1645     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1646     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1647     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1648     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1649     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1650   }
1651 
1652   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1653   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1654   B->preallocated = PETSC_TRUE;
1655   PetscFunctionReturn(0);
1656 }
1657 EXTERN_C_END
1658 
1659 EXTERN_C_BEGIN
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1662 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1663 {
1664   PetscInt       m,rstart,cstart,cend;
1665   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1666   const PetscInt *JJ=0;
1667   PetscScalar    *values=0;
1668   PetscErrorCode ierr;
1669 
1670   PetscFunctionBegin;
1671 
1672   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1673   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1674   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1675   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1676   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1677   m      = B->rmap->n/bs;
1678   rstart = B->rmap->rstart/bs;
1679   cstart = B->cmap->rstart/bs;
1680   cend   = B->cmap->rend/bs;
1681 
1682   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1683   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1684   for (i=0; i<m; i++) {
1685     nz = ii[i+1] - ii[i];
1686     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1687     nz_max = PetscMax(nz_max,nz);
1688     JJ  = jj + ii[i];
1689     for (j=0; j<nz; j++) {
1690       if (*JJ >= cstart) break;
1691       JJ++;
1692     }
1693     d = 0;
1694     for (; j<nz; j++) {
1695       if (*JJ++ >= cend) break;
1696       d++;
1697     }
1698     d_nnz[i] = d;
1699     o_nnz[i] = nz - d;
1700   }
1701   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1702   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1703 
1704   values = (PetscScalar*)V;
1705   if (!values) {
1706     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1707     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1708   }
1709   for (i=0; i<m; i++) {
1710     PetscInt          row    = i + rstart;
1711     PetscInt          ncols  = ii[i+1] - ii[i];
1712     const PetscInt    *icols = jj + ii[i];
1713     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1714     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1715   }
1716 
1717   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1718   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1719   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1720 
1721   PetscFunctionReturn(0);
1722 }
1723 EXTERN_C_END
1724 
1725 EXTERN_C_BEGIN
1726 #if defined(PETSC_HAVE_MUMPS)
1727 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1728 #endif
1729 #if defined(PETSC_HAVE_SPOOLES)
1730 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1731 #endif
1732 #if defined(PETSC_HAVE_PASTIX)
1733 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1734 #endif
1735 EXTERN_C_END
1736 
1737 /*MC
1738    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1739    based on block compressed sparse row format.  Only the upper triangular portion of the "diagonal" portion of
1740    the matrix is stored.
1741 
1742   For complex numbers by default this matrix is symmetric, NOT Hermitian symmetric. To make it Hermitian symmetric you
1743   can call MatSetOption(Mat, MAT_HERMITIAN);
1744 
1745    Options Database Keys:
1746 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1747 
1748   Level: beginner
1749 
1750 .seealso: MatCreateMPISBAIJ
1751 M*/
1752 
1753 EXTERN_C_BEGIN
1754 extern PetscErrorCode MatConvert_MPISBAIJ_MPISBSTRM(Mat,const MatType,MatReuse,Mat*);
1755 EXTERN_C_END
1756 
1757 EXTERN_C_BEGIN
1758 #undef __FUNCT__
1759 #define __FUNCT__ "MatCreate_MPISBAIJ"
1760 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1761 {
1762   Mat_MPISBAIJ   *b;
1763   PetscErrorCode ierr;
1764   PetscBool      flg;
1765 
1766   PetscFunctionBegin;
1767 
1768   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1769   B->data = (void*)b;
1770   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1771 
1772   B->ops->destroy    = MatDestroy_MPISBAIJ;
1773   B->ops->view       = MatView_MPISBAIJ;
1774   B->assembled       = PETSC_FALSE;
1775 
1776   B->insertmode = NOT_SET_VALUES;
1777   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1778   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1779 
1780   /* build local table of row and column ownerships */
1781   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1782 
1783   /* build cache for off array entries formed */
1784   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1785   b->donotstash  = PETSC_FALSE;
1786   b->colmap      = PETSC_NULL;
1787   b->garray      = PETSC_NULL;
1788   b->roworiented = PETSC_TRUE;
1789 
1790   /* stuff used in block assembly */
1791   b->barray       = 0;
1792 
1793   /* stuff used for matrix vector multiply */
1794   b->lvec         = 0;
1795   b->Mvctx        = 0;
1796   b->slvec0       = 0;
1797   b->slvec0b      = 0;
1798   b->slvec1       = 0;
1799   b->slvec1a      = 0;
1800   b->slvec1b      = 0;
1801   b->sMvctx       = 0;
1802 
1803   /* stuff for MatGetRow() */
1804   b->rowindices   = 0;
1805   b->rowvalues    = 0;
1806   b->getrowactive = PETSC_FALSE;
1807 
1808   /* hash table stuff */
1809   b->ht           = 0;
1810   b->hd           = 0;
1811   b->ht_size      = 0;
1812   b->ht_flag      = PETSC_FALSE;
1813   b->ht_fact      = 0;
1814   b->ht_total_ct  = 0;
1815   b->ht_insert_ct = 0;
1816 
1817   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1818   b->ijonly       = PETSC_FALSE;
1819 
1820   b->in_loc       = 0;
1821   b->v_loc        = 0;
1822   b->n_loc        = 0;
1823   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1824     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1825     if (flg) {
1826       PetscReal fact = 1.39;
1827       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1828       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1829       if (fact <= 1.0) fact = 1.39;
1830       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1831       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1832     }
1833   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1834 
1835 #if defined(PETSC_HAVE_PASTIX)
1836   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1837 					   "MatGetFactor_mpisbaij_pastix",
1838 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1839 #endif
1840 #if defined(PETSC_HAVE_MUMPS)
1841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1842                                      "MatGetFactor_sbaij_mumps",
1843                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1844 #endif
1845 #if defined(PETSC_HAVE_SPOOLES)
1846   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1847                                      "MatGetFactor_mpisbaij_spooles",
1848                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1849 #endif
1850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1851                                      "MatStoreValues_MPISBAIJ",
1852                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1854                                      "MatRetrieveValues_MPISBAIJ",
1855                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1856   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1857                                      "MatGetDiagonalBlock_MPISBAIJ",
1858                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1859   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1860                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1861                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1862   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1863                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1864                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1865   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatConvert_mpisbaij_mpisbstrm_C",
1866                                      "MatConvert_MPISBAIJ_MPISBSTRM",
1867                                       MatConvert_MPISBAIJ_MPISBSTRM);CHKERRQ(ierr);
1868 
1869   B->symmetric                  = PETSC_TRUE;
1870   B->structurally_symmetric     = PETSC_TRUE;
1871   B->symmetric_set              = PETSC_TRUE;
1872   B->structurally_symmetric_set = PETSC_TRUE;
1873   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1874   PetscFunctionReturn(0);
1875 }
1876 EXTERN_C_END
1877 
1878 /*MC
1879    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1880 
1881    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1882    and MATMPISBAIJ otherwise.
1883 
1884    Options Database Keys:
1885 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1886 
1887   Level: beginner
1888 
1889 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1890 M*/
1891 
1892 EXTERN_C_BEGIN
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatCreate_SBAIJ"
1895 PetscErrorCode  MatCreate_SBAIJ(Mat A)
1896 {
1897   PetscErrorCode ierr;
1898   PetscMPIInt    size;
1899 
1900   PetscFunctionBegin;
1901   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1902   if (size == 1) {
1903     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1904   } else {
1905     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1906   }
1907   PetscFunctionReturn(0);
1908 }
1909 EXTERN_C_END
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1913 /*@C
1914    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1915    the user should preallocate the matrix storage by setting the parameters
1916    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1917    performance can be increased by more than a factor of 50.
1918 
1919    Collective on Mat
1920 
1921    Input Parameters:
1922 +  A - the matrix
1923 .  bs   - size of blockk
1924 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1925            submatrix  (same for all local rows)
1926 .  d_nnz - array containing the number of block nonzeros in the various block rows
1927            in the upper triangular and diagonal part of the in diagonal portion of the local
1928            (possibly different for each block row) or PETSC_NULL.  You must leave room
1929            for the diagonal entry even if it is zero.
1930 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1931            submatrix (same for all local rows).
1932 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1933            off-diagonal portion of the local submatrix (possibly different for
1934            each block row) or PETSC_NULL.
1935 
1936 
1937    Options Database Keys:
1938 .   -mat_no_unroll - uses code that does not unroll the loops in the
1939                      block calculations (much slower)
1940 .   -mat_block_size - size of the blocks to use
1941 
1942    Notes:
1943 
1944    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1945    than it must be used on all processors that share the object for that argument.
1946 
1947    If the *_nnz parameter is given then the *_nz parameter is ignored
1948 
1949    Storage Information:
1950    For a square global matrix we define each processor's diagonal portion
1951    to be its local rows and the corresponding columns (a square submatrix);
1952    each processor's off-diagonal portion encompasses the remainder of the
1953    local matrix (a rectangular submatrix).
1954 
1955    The user can specify preallocated storage for the diagonal part of
1956    the local submatrix with either d_nz or d_nnz (not both).  Set
1957    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1958    memory allocation.  Likewise, specify preallocated storage for the
1959    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1960 
1961    You can call MatGetInfo() to get information on how effective the preallocation was;
1962    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1963    You can also run with the option -info and look for messages with the string
1964    malloc in them to see if additional memory allocation was needed.
1965 
1966    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1967    the figure below we depict these three local rows and all columns (0-11).
1968 
1969 .vb
1970            0 1 2 3 4 5 6 7 8 9 10 11
1971           -------------------
1972    row 3  |  o o o d d d o o o o o o
1973    row 4  |  o o o d d d o o o o o o
1974    row 5  |  o o o d d d o o o o o o
1975           -------------------
1976 .ve
1977 
1978    Thus, any entries in the d locations are stored in the d (diagonal)
1979    submatrix, and any entries in the o locations are stored in the
1980    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1981    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1982 
1983    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1984    plus the diagonal part of the d matrix,
1985    and o_nz should indicate the number of block nonzeros per row in the upper triangular
1986    part of the o matrix.
1987    In general, for PDE problems in which most nonzeros are near the diagonal,
1988    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1989    or you will get TERRIBLE performance; see the users' manual chapter on
1990    matrices.
1991 
1992    Level: intermediate
1993 
1994 .keywords: matrix, block, aij, compressed row, sparse, parallel
1995 
1996 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1997 @*/
1998 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1999 {
2000   PetscErrorCode ierr;
2001 
2002   PetscFunctionBegin;
2003   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
2004   PetscFunctionReturn(0);
2005 }
2006 
2007 #undef __FUNCT__
2008 #define __FUNCT__ "MatCreateMPISBAIJ"
2009 /*@C
2010    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
2011    (block compressed row).  For good matrix assembly performance
2012    the user should preallocate the matrix storage by setting the parameters
2013    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2014    performance can be increased by more than a factor of 50.
2015 
2016    Collective on MPI_Comm
2017 
2018    Input Parameters:
2019 +  comm - MPI communicator
2020 .  bs   - size of blockk
2021 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2022            This value should be the same as the local size used in creating the
2023            y vector for the matrix-vector product y = Ax.
2024 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2025            This value should be the same as the local size used in creating the
2026            x vector for the matrix-vector product y = Ax.
2027 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2028 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2029 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2030            submatrix  (same for all local rows)
2031 .  d_nnz - array containing the number of block nonzeros in the various block rows
2032            in the upper triangular portion of the in diagonal portion of the local
2033            (possibly different for each block block row) or PETSC_NULL.
2034            You must leave room for the diagonal entry even if it is zero.
2035 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2036            submatrix (same for all local rows).
2037 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2038            off-diagonal portion of the local submatrix (possibly different for
2039            each block row) or PETSC_NULL.
2040 
2041    Output Parameter:
2042 .  A - the matrix
2043 
2044    Options Database Keys:
2045 .   -mat_no_unroll - uses code that does not unroll the loops in the
2046                      block calculations (much slower)
2047 .   -mat_block_size - size of the blocks to use
2048 .   -mat_mpi - use the parallel matrix data structures even on one processor
2049                (defaults to using SeqBAIJ format on one processor)
2050 
2051    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2052    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2053    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2054 
2055    Notes:
2056    The number of rows and columns must be divisible by blocksize.
2057    This matrix type does not support complex Hermitian operation.
2058 
2059    The user MUST specify either the local or global matrix dimensions
2060    (possibly both).
2061 
2062    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2063    than it must be used on all processors that share the object for that argument.
2064 
2065    If the *_nnz parameter is given then the *_nz parameter is ignored
2066 
2067    Storage Information:
2068    For a square global matrix we define each processor's diagonal portion
2069    to be its local rows and the corresponding columns (a square submatrix);
2070    each processor's off-diagonal portion encompasses the remainder of the
2071    local matrix (a rectangular submatrix).
2072 
2073    The user can specify preallocated storage for the diagonal part of
2074    the local submatrix with either d_nz or d_nnz (not both).  Set
2075    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2076    memory allocation.  Likewise, specify preallocated storage for the
2077    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2078 
2079    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2080    the figure below we depict these three local rows and all columns (0-11).
2081 
2082 .vb
2083            0 1 2 3 4 5 6 7 8 9 10 11
2084           -------------------
2085    row 3  |  o o o d d d o o o o o o
2086    row 4  |  o o o d d d o o o o o o
2087    row 5  |  o o o d d d o o o o o o
2088           -------------------
2089 .ve
2090 
2091    Thus, any entries in the d locations are stored in the d (diagonal)
2092    submatrix, and any entries in the o locations are stored in the
2093    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2094    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2095 
2096    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2097    plus the diagonal part of the d matrix,
2098    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2099    In general, for PDE problems in which most nonzeros are near the diagonal,
2100    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2101    or you will get TERRIBLE performance; see the users' manual chapter on
2102    matrices.
2103 
2104    Level: intermediate
2105 
2106 .keywords: matrix, block, aij, compressed row, sparse, parallel
2107 
2108 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2109 @*/
2110 
2111 PetscErrorCode  MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2112 {
2113   PetscErrorCode ierr;
2114   PetscMPIInt    size;
2115 
2116   PetscFunctionBegin;
2117   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2118   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2119   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2120   if (size > 1) {
2121     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2122     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2123   } else {
2124     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2125     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2126   }
2127   PetscFunctionReturn(0);
2128 }
2129 
2130 
2131 #undef __FUNCT__
2132 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2133 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2134 {
2135   Mat            mat;
2136   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2137   PetscErrorCode ierr;
2138   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2139   PetscScalar    *array;
2140 
2141   PetscFunctionBegin;
2142   *newmat       = 0;
2143   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2144   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2145   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2146   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2147   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2148   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2149 
2150   mat->factortype   = matin->factortype;
2151   mat->preallocated = PETSC_TRUE;
2152   mat->assembled    = PETSC_TRUE;
2153   mat->insertmode   = NOT_SET_VALUES;
2154 
2155   a = (Mat_MPISBAIJ*)mat->data;
2156   a->bs2   = oldmat->bs2;
2157   a->mbs   = oldmat->mbs;
2158   a->nbs   = oldmat->nbs;
2159   a->Mbs   = oldmat->Mbs;
2160   a->Nbs   = oldmat->Nbs;
2161 
2162 
2163   a->size         = oldmat->size;
2164   a->rank         = oldmat->rank;
2165   a->donotstash   = oldmat->donotstash;
2166   a->roworiented  = oldmat->roworiented;
2167   a->rowindices   = 0;
2168   a->rowvalues    = 0;
2169   a->getrowactive = PETSC_FALSE;
2170   a->barray       = 0;
2171   a->rstartbs    = oldmat->rstartbs;
2172   a->rendbs      = oldmat->rendbs;
2173   a->cstartbs    = oldmat->cstartbs;
2174   a->cendbs      = oldmat->cendbs;
2175 
2176   /* hash table stuff */
2177   a->ht           = 0;
2178   a->hd           = 0;
2179   a->ht_size      = 0;
2180   a->ht_flag      = oldmat->ht_flag;
2181   a->ht_fact      = oldmat->ht_fact;
2182   a->ht_total_ct  = 0;
2183   a->ht_insert_ct = 0;
2184 
2185   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2186   if (oldmat->colmap) {
2187 #if defined (PETSC_USE_CTABLE)
2188     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2189 #else
2190     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2191     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2192     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2193 #endif
2194   } else a->colmap = 0;
2195 
2196   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2197     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2198     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2199     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2200   } else a->garray = 0;
2201 
2202   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2203   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2204   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2205   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2206   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2207 
2208   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2209   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2210   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2211   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2212 
2213   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2214   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2215   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2216   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2217   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2218   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2219   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2220   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2221   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2222   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2223   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2224   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2225   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2226 
2227   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2228   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2229   a->sMvctx = oldmat->sMvctx;
2230   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2231 
2232   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2233   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2234   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2235   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2236   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2237   *newmat = mat;
2238   PetscFunctionReturn(0);
2239 }
2240 
2241 #undef __FUNCT__
2242 #define __FUNCT__ "MatLoad_MPISBAIJ"
2243 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2244 {
2245   PetscErrorCode ierr;
2246   PetscInt       i,nz,j,rstart,rend;
2247   PetscScalar    *vals,*buf;
2248   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2249   MPI_Status     status;
2250   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2251   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2252   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2253   PetscInt       bs=1,Mbs,mbs,extra_rows;
2254   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2255   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2256   int            fd;
2257 
2258   PetscFunctionBegin;
2259   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2260     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2261   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2262 
2263   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2264   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2265   if (!rank) {
2266     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2267     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2268     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2269     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2270   }
2271 
2272   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2273 
2274   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2275   M = header[1]; N = header[2];
2276 
2277   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2278   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2279   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2280 
2281   /* If global sizes are set, check if they are consistent with that given in the file */
2282   if (sizesset) {
2283     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2284   }
2285   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2286   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2287 
2288   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2289 
2290   /*
2291      This code adds extra rows to make sure the number of rows is
2292      divisible by the blocksize
2293   */
2294   Mbs        = M/bs;
2295   extra_rows = bs - M + bs*(Mbs);
2296   if (extra_rows == bs) extra_rows = 0;
2297   else                  Mbs++;
2298   if (extra_rows &&!rank) {
2299     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2300   }
2301 
2302   /* determine ownership of all rows */
2303   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2304     mbs        = Mbs/size + ((Mbs % size) > rank);
2305     m          = mbs*bs;
2306   } else { /* User Set */
2307     m          = newmat->rmap->n;
2308     mbs        = m/bs;
2309   }
2310   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2311   mmbs       = PetscMPIIntCast(mbs);
2312   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2313   rowners[0] = 0;
2314   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2315   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2316   rstart = rowners[rank];
2317   rend   = rowners[rank+1];
2318 
2319   /* distribute row lengths to all processors */
2320   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2321   if (!rank) {
2322     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2323     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2324     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2325     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2326     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2327     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2328     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2329   } else {
2330     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2331   }
2332 
2333   if (!rank) {   /* procs[0] */
2334     /* calculate the number of nonzeros on each processor */
2335     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2336     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2337     for (i=0; i<size; i++) {
2338       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2339         procsnz[i] += rowlengths[j];
2340       }
2341     }
2342     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2343 
2344     /* determine max buffer needed and allocate it */
2345     maxnz = 0;
2346     for (i=0; i<size; i++) {
2347       maxnz = PetscMax(maxnz,procsnz[i]);
2348     }
2349     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2350 
2351     /* read in my part of the matrix column indices  */
2352     nz     = procsnz[0];
2353     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2354     mycols = ibuf;
2355     if (size == 1)  nz -= extra_rows;
2356     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2357     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2358 
2359     /* read in every ones (except the last) and ship off */
2360     for (i=1; i<size-1; i++) {
2361       nz   = procsnz[i];
2362       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2363       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2364     }
2365     /* read in the stuff for the last proc */
2366     if (size != 1) {
2367       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2368       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2369       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2370       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2371     }
2372     ierr = PetscFree(cols);CHKERRQ(ierr);
2373   } else {  /* procs[i], i>0 */
2374     /* determine buffer space needed for message */
2375     nz = 0;
2376     for (i=0; i<m; i++) {
2377       nz += locrowlens[i];
2378     }
2379     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2380     mycols = ibuf;
2381     /* receive message of column indices*/
2382     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2383     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2384     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2385   }
2386 
2387   /* loop over local rows, determining number of off diagonal entries */
2388   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2389   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2390   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2391   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2392   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2393   rowcount = 0;
2394   nzcount  = 0;
2395   for (i=0; i<mbs; i++) {
2396     dcount  = 0;
2397     odcount = 0;
2398     for (j=0; j<bs; j++) {
2399       kmax = locrowlens[rowcount];
2400       for (k=0; k<kmax; k++) {
2401         tmp = mycols[nzcount++]/bs; /* block col. index */
2402         if (!mask[tmp]) {
2403           mask[tmp] = 1;
2404           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2405           else masked1[dcount++] = tmp; /* entry in diag portion */
2406         }
2407       }
2408       rowcount++;
2409     }
2410 
2411     dlens[i]  = dcount;  /* d_nzz[i] */
2412     odlens[i] = odcount; /* o_nzz[i] */
2413 
2414     /* zero out the mask elements we set */
2415     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2416     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2417   }
2418     if (!sizesset) {
2419     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2420   }
2421   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2422   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2423 
2424   if (!rank) {
2425     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2426     /* read in my part of the matrix numerical values  */
2427     nz = procsnz[0];
2428     vals = buf;
2429     mycols = ibuf;
2430     if (size == 1)  nz -= extra_rows;
2431     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2432     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2433 
2434     /* insert into matrix */
2435     jj      = rstart*bs;
2436     for (i=0; i<m; i++) {
2437       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2438       mycols += locrowlens[i];
2439       vals   += locrowlens[i];
2440       jj++;
2441     }
2442 
2443     /* read in other processors (except the last one) and ship out */
2444     for (i=1; i<size-1; i++) {
2445       nz   = procsnz[i];
2446       vals = buf;
2447       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2448       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2449     }
2450     /* the last proc */
2451     if (size != 1){
2452       nz   = procsnz[i] - extra_rows;
2453       vals = buf;
2454       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2455       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2456       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2457     }
2458     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2459 
2460   } else {
2461     /* receive numeric values */
2462     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2463 
2464     /* receive message of values*/
2465     vals   = buf;
2466     mycols = ibuf;
2467     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2468     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2469     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2470 
2471     /* insert into matrix */
2472     jj      = rstart*bs;
2473     for (i=0; i<m; i++) {
2474       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2475       mycols += locrowlens[i];
2476       vals   += locrowlens[i];
2477       jj++;
2478     }
2479   }
2480 
2481   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2482   ierr = PetscFree(buf);CHKERRQ(ierr);
2483   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2484   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2485   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2486   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2487   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2488   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2494 /*XXXXX@
2495    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2496 
2497    Input Parameters:
2498 .  mat  - the matrix
2499 .  fact - factor
2500 
2501    Not Collective on Mat, each process can have a different hash factor
2502 
2503    Level: advanced
2504 
2505   Notes:
2506    This can also be set by the command line option: -mat_use_hash_table fact
2507 
2508 .keywords: matrix, hashtable, factor, HT
2509 
2510 .seealso: MatSetOption()
2511 @XXXXX*/
2512 
2513 
2514 #undef __FUNCT__
2515 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2516 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2517 {
2518   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2519   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2520   PetscReal      atmp;
2521   PetscReal      *work,*svalues,*rvalues;
2522   PetscErrorCode ierr;
2523   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2524   PetscMPIInt    rank,size;
2525   PetscInt       *rowners_bs,dest,count,source;
2526   PetscScalar    *va;
2527   MatScalar      *ba;
2528   MPI_Status     stat;
2529 
2530   PetscFunctionBegin;
2531   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2532   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2533   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2534 
2535   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2536   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2537 
2538   bs   = A->rmap->bs;
2539   mbs  = a->mbs;
2540   Mbs  = a->Mbs;
2541   ba   = b->a;
2542   bi   = b->i;
2543   bj   = b->j;
2544 
2545   /* find ownerships */
2546   rowners_bs = A->rmap->range;
2547 
2548   /* each proc creates an array to be distributed */
2549   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2550   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2551 
2552   /* row_max for B */
2553   if (rank != size-1){
2554     for (i=0; i<mbs; i++) {
2555       ncols = bi[1] - bi[0]; bi++;
2556       brow  = bs*i;
2557       for (j=0; j<ncols; j++){
2558         bcol = bs*(*bj);
2559         for (kcol=0; kcol<bs; kcol++){
2560           col = bcol + kcol;                 /* local col index */
2561           col += rowners_bs[rank+1];      /* global col index */
2562           for (krow=0; krow<bs; krow++){
2563             atmp = PetscAbsScalar(*ba); ba++;
2564             row = brow + krow;    /* local row index */
2565             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2566             if (work[col] < atmp) work[col] = atmp;
2567           }
2568         }
2569         bj++;
2570       }
2571     }
2572 
2573     /* send values to its owners */
2574     for (dest=rank+1; dest<size; dest++){
2575       svalues = work + rowners_bs[dest];
2576       count   = rowners_bs[dest+1]-rowners_bs[dest];
2577       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2578     }
2579   }
2580 
2581   /* receive values */
2582   if (rank){
2583     rvalues = work;
2584     count   = rowners_bs[rank+1]-rowners_bs[rank];
2585     for (source=0; source<rank; source++){
2586       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2587       /* process values */
2588       for (i=0; i<count; i++){
2589         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2590       }
2591     }
2592   }
2593 
2594   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2595   ierr = PetscFree(work);CHKERRQ(ierr);
2596   PetscFunctionReturn(0);
2597 }
2598 
2599 #undef __FUNCT__
2600 #define __FUNCT__ "MatSOR_MPISBAIJ"
2601 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2602 {
2603   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2604   PetscErrorCode    ierr;
2605   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2606   PetscScalar       *x,*ptr,*from;
2607   Vec               bb1;
2608   const PetscScalar *b;
2609 
2610   PetscFunctionBegin;
2611   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2612   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2613 
2614   if (flag == SOR_APPLY_UPPER) {
2615     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2616     PetscFunctionReturn(0);
2617   }
2618 
2619   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2620     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2621       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2622       its--;
2623     }
2624 
2625     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2626     while (its--){
2627 
2628       /* lower triangular part: slvec0b = - B^T*xx */
2629       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2630 
2631       /* copy xx into slvec0a */
2632       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2633       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2634       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2635       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2636 
2637       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2638 
2639       /* copy bb into slvec1a */
2640       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2641       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2642       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2643       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2644 
2645       /* set slvec1b = 0 */
2646       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2647 
2648       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2649       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2650       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2651       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2652 
2653       /* upper triangular part: bb1 = bb1 - B*x */
2654       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2655 
2656       /* local diagonal sweep */
2657       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2658     }
2659     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2660   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2661     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2662   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2663     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2664   } else if (flag & SOR_EISENSTAT) {
2665     Vec               xx1;
2666     PetscBool         hasop;
2667     const PetscScalar *diag;
2668     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2669     PetscInt          i,n;
2670 
2671     if (!mat->xx1) {
2672       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2673       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2674     }
2675     xx1 = mat->xx1;
2676     bb1 = mat->bb1;
2677 
2678     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2679 
2680     if (!mat->diag) {
2681       /* this is wrong for same matrix with new nonzero values */
2682       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2683       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2684     }
2685     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2686 
2687     if (hasop) {
2688       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2689       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2690     } else {
2691       /*
2692           These two lines are replaced by code that may be a bit faster for a good compiler
2693       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2694       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2695       */
2696       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2697       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2698       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2699       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2700       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2701       if (omega == 1.0) {
2702 	for (i=0; i<n; i++) {
2703 	  sl[i] = b[i] - diag[i]*x[i];
2704 	}
2705         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2706       } else {
2707 	for (i=0; i<n; i++) {
2708 	  sl[i] = b[i] + scale*diag[i]*x[i];
2709 	}
2710         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2711       }
2712       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2713       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2714       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2715       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2716     }
2717 
2718     /* multiply off-diagonal portion of matrix */
2719     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2720     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2721     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2722     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2723     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2724     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2725     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2726     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2727     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2728     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2729 
2730     /* local sweep */
2731     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2732     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2733   } else {
2734     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2735   }
2736   PetscFunctionReturn(0);
2737 }
2738 
2739 #undef __FUNCT__
2740 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2741 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2742 {
2743   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2744   PetscErrorCode ierr;
2745   Vec            lvec1,bb1;
2746 
2747   PetscFunctionBegin;
2748   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2749   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2750 
2751   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2752     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2753       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2754       its--;
2755     }
2756 
2757     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2758     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2759     while (its--){
2760       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2761 
2762       /* lower diagonal part: bb1 = bb - B^T*xx */
2763       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2764       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2765 
2766       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2767       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2768       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2769 
2770       /* upper diagonal part: bb1 = bb1 - B*x */
2771       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2772       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2773 
2774       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2775 
2776       /* diagonal sweep */
2777       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2778     }
2779     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2780     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2781   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2782   PetscFunctionReturn(0);
2783 }
2784 
2785 #undef __FUNCT__
2786 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2787 /*@
2788      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2789          CSR format the local rows.
2790 
2791    Collective on MPI_Comm
2792 
2793    Input Parameters:
2794 +  comm - MPI communicator
2795 .  bs - the block size, only a block size of 1 is supported
2796 .  m - number of local rows (Cannot be PETSC_DECIDE)
2797 .  n - This value should be the same as the local size used in creating the
2798        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2799        calculated if N is given) For square matrices n is almost always m.
2800 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2801 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2802 .   i - row indices
2803 .   j - column indices
2804 -   a - matrix values
2805 
2806    Output Parameter:
2807 .   mat - the matrix
2808 
2809    Level: intermediate
2810 
2811    Notes:
2812        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2813      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2814      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2815 
2816        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2817 
2818 .keywords: matrix, aij, compressed row, sparse, parallel
2819 
2820 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2821           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2822 @*/
2823 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2824 {
2825   PetscErrorCode ierr;
2826 
2827 
2828  PetscFunctionBegin;
2829   if (i[0]) {
2830     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2831   }
2832   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2833   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2834   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2835   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2836   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2837   PetscFunctionReturn(0);
2838 }
2839 
2840 
2841 #undef __FUNCT__
2842 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2843 /*@C
2844    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2845    (the default parallel PETSc format).
2846 
2847    Collective on MPI_Comm
2848 
2849    Input Parameters:
2850 +  A - the matrix
2851 .  bs - the block size
2852 .  i - the indices into j for the start of each local row (starts with zero)
2853 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2854 -  v - optional values in the matrix
2855 
2856    Level: developer
2857 
2858 .keywords: matrix, aij, compressed row, sparse, parallel
2859 
2860 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2861 @*/
2862 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2863 {
2864   PetscErrorCode ierr;
2865 
2866   PetscFunctionBegin;
2867   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2868   PetscFunctionReturn(0);
2869 }
2870 
2871 
2872