xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision da0e1c470cd0cbbbdc182335ab9ba482b734ddca)
1 
2 #include <../src/mat/impls/baij/mpi/mpibaij.h>    /*I "petscmat.h" I*/
3 #include <../src/mat/impls/sbaij/mpi/mpisbaij.h>
4 #include <../src/mat/impls/sbaij/seq/sbaij.h>
5 #include <petscblaslapack.h>
6 
7 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 extern PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 extern PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 extern PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 extern PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 extern PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 extern PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 extern PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 extern PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 extern PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 extern PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*,Vec,Vec);
19 extern PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *,Vec,Vec);
20 extern PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 extern PetscErrorCode MatSOR_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode  MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode  MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
55 { \
56  \
57     brow = row/bs;  \
58     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
59     rmax = aimax[brow]; nrow = ailen[brow]; \
60       bcol = col/bs; \
61       ridx = row % bs; cidx = col % bs; \
62       low = 0; high = nrow; \
63       while (high-low > 3) { \
64         t = (low+high)/2; \
65         if (rp[t] > bcol) high = t; \
66         else              low  = t; \
67       } \
68       for (_i=low; _i<high; _i++) { \
69         if (rp[_i] > bcol) break; \
70         if (rp[_i] == bcol) { \
71           bap  = ap +  bs2*_i + bs*cidx + ridx; \
72           if (addv == ADD_VALUES) *bap += value;  \
73           else                    *bap  = value;  \
74           goto a_noinsert; \
75         } \
76       } \
77       if (a->nonew == 1) goto a_noinsert; \
78       if (a->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
79       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
80       N = nrow++ - 1;  \
81       /* shift up all the later entries in this row */ \
82       for (ii=N; ii>=_i; ii--) { \
83         rp[ii+1] = rp[ii]; \
84         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
85       } \
86       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
87       rp[_i]                      = bcol;  \
88       ap[bs2*_i + bs*cidx + ridx] = value;  \
89       a_noinsert:; \
90     ailen[brow] = nrow; \
91 }
92 
93 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
94 { \
95     brow = row/bs;  \
96     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
97     rmax = bimax[brow]; nrow = bilen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto b_noinsert; \
113         } \
114       } \
115       if (b->nonew == 1) goto b_noinsert; \
116       if (b->nonew == -1) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
117       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
118       N = nrow++ - 1;  \
119       /* shift up all the later entries in this row */ \
120       for (ii=N; ii>=_i; ii--) { \
121         rp[ii+1] = rp[ii]; \
122         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123       } \
124       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
125       rp[_i]                      = bcol;  \
126       ap[bs2*_i + bs*cidx + ridx] = value;  \
127       b_noinsert:; \
128     bilen[brow] = nrow; \
129 }
130 
131 /* Only add/insert a(i,j) with i<=j (blocks).
132    Any a(i,j) with i>j input by user is ingored.
133 */
134 #undef __FUNCT__
135 #define __FUNCT__ "MatSetValues_MPISBAIJ"
136 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
137 {
138   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
139   MatScalar      value;
140   PetscBool      roworiented = baij->roworiented;
141   PetscErrorCode ierr;
142   PetscInt       i,j,row,col;
143   PetscInt       rstart_orig=mat->rmap->rstart;
144   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
145   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
146 
147   /* Some Variables required in the macro */
148   Mat            A = baij->A;
149   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
150   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
151   MatScalar      *aa=a->a;
152 
153   Mat            B = baij->B;
154   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
155   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
156   MatScalar     *ba=b->a;
157 
158   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
159   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
160   MatScalar     *ap,*bap;
161 
162   /* for stash */
163   PetscInt      n_loc, *in_loc = PETSC_NULL;
164   MatScalar     *v_loc = PETSC_NULL;
165 
166   PetscFunctionBegin;
167   if (v) PetscValidScalarPointer(v,6);
168   if (!baij->donotstash){
169     if (n > baij->n_loc) {
170       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
171       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
172       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
174       baij->n_loc = n;
175     }
176     in_loc = baij->in_loc;
177     v_loc  = baij->v_loc;
178   }
179 
180   for (i=0; i<m; i++) {
181     if (im[i] < 0) continue;
182 #if defined(PETSC_USE_DEBUG)
183     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
184 #endif
185     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
186       row = im[i] - rstart_orig;              /* local row index */
187       for (j=0; j<n; j++) {
188         if (im[i]/bs > in[j]/bs){
189           if (a->ignore_ltriangular){
190             continue;    /* ignore lower triangular blocks */
191           } else {
192             SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
193           }
194         }
195         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
196           col = in[j] - cstart_orig;          /* local col index */
197           brow = row/bs; bcol = col/bs;
198           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
199           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
200           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
201           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
202         } else if (in[j] < 0) continue;
203 #if defined(PETSC_USE_DEBUG)
204         else if (in[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);
205 #endif
206         else {  /* off-diag entry (B) */
207           if (mat->was_assembled) {
208             if (!baij->colmap) {
209               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
210             }
211 #if defined (PETSC_USE_CTABLE)
212             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
213             col  = col - 1;
214 #else
215             col = baij->colmap[in[j]/bs] - 1;
216 #endif
217             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
218               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
219               col =  in[j];
220               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
221               B = baij->B;
222               b = (Mat_SeqBAIJ*)(B)->data;
223               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
224               ba=b->a;
225             } else col += in[j]%bs;
226           } else col = in[j];
227           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
228           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
229           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
230         }
231       }
232     } else {  /* off processor entry */
233       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
234       if (!baij->donotstash) {
235         n_loc = 0;
236         for (j=0; j<n; j++){
237           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
238           in_loc[n_loc] = in[j];
239           if (roworiented) {
240             v_loc[n_loc] = v[i*n+j];
241           } else {
242             v_loc[n_loc] = v[j*m+i];
243           }
244           n_loc++;
245         }
246         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc,PETSC_FALSE);CHKERRQ(ierr);
247       }
248     }
249   }
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
256 {
257   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
258   const MatScalar *value;
259   MatScalar       *barray=baij->barray;
260   PetscBool       roworiented = baij->roworiented,ignore_ltriangular = ((Mat_SeqSBAIJ*)baij->A->data)->ignore_ltriangular;
261   PetscErrorCode  ierr;
262   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
263   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
264   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
265 
266   PetscFunctionBegin;
267   if(!barray) {
268     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
269     baij->barray = barray;
270   }
271 
272   if (roworiented) {
273     stepval = (n-1)*bs;
274   } else {
275     stepval = (m-1)*bs;
276   }
277   for (i=0; i<m; i++) {
278     if (im[i] < 0) continue;
279 #if defined(PETSC_USE_DEBUG)
280     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
281 #endif
282     if (im[i] >= rstart && im[i] < rend) {
283       row = im[i] - rstart;
284       for (j=0; j<n; j++) {
285         if (im[i] > in[j]) {
286           if (ignore_ltriangular) continue; /* ignore lower triangular blocks */
287           else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
288         }
289         /* If NumCol = 1 then a copy is not required */
290         if ((roworiented) && (n == 1)) {
291           barray = (MatScalar*) v + i*bs2;
292         } else if((!roworiented) && (m == 1)) {
293           barray = (MatScalar*) v + j*bs2;
294         } else { /* Here a copy is required */
295           if (roworiented) {
296             value = v + i*(stepval+bs)*bs + j*bs;
297           } else {
298             value = v + j*(stepval+bs)*bs + i*bs;
299           }
300           for (ii=0; ii<bs; ii++,value+=stepval) {
301             for (jj=0; jj<bs; jj++) {
302               *barray++  = *value++;
303             }
304           }
305           barray -=bs2;
306         }
307 
308         if (in[j] >= cstart && in[j] < cend){
309           col  = in[j] - cstart;
310           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
311         }
312         else if (in[j] < 0) continue;
313 #if defined(PETSC_USE_DEBUG)
314         else if (in[j] >= baij->Nbs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);
315 #endif
316         else {
317           if (mat->was_assembled) {
318             if (!baij->colmap) {
319               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
320             }
321 
322 #if defined(PETSC_USE_DEBUG)
323 #if defined (PETSC_USE_CTABLE)
324             { PetscInt data;
325               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
326               if ((data - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
327             }
328 #else
329             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Incorrect colmap");
330 #endif
331 #endif
332 #if defined (PETSC_USE_CTABLE)
333 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
334             col  = (col - 1)/bs;
335 #else
336             col = (baij->colmap[in[j]] - 1)/bs;
337 #endif
338             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341             }
342           }
343           else col = in[j];
344           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
345         }
346       }
347     } else {
348       if (mat->nooffprocentries) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Setting off process row %D even though MatSetOption(,MAT_NO_OFF_PROC_ENTRIES,PETSC_TRUE) was set",im[i]);
349       if (!baij->donotstash) {
350         if (roworiented) {
351           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
352         } else {
353           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
354         }
355       }
356     }
357   }
358   PetscFunctionReturn(0);
359 }
360 
361 #undef __FUNCT__
362 #define __FUNCT__ "MatGetValues_MPISBAIJ"
363 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
364 {
365   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
366   PetscErrorCode ierr;
367   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
368   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
369 
370   PetscFunctionBegin;
371   for (i=0; i<m; i++) {
372     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
373     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
374     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
375       row = idxm[i] - bsrstart;
376       for (j=0; j<n; j++) {
377         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
378         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
379         if (idxn[j] >= bscstart && idxn[j] < bscend){
380           col = idxn[j] - bscstart;
381           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
382         } else {
383           if (!baij->colmap) {
384             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
385           }
386 #if defined (PETSC_USE_CTABLE)
387           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
388           data --;
389 #else
390           data = baij->colmap[idxn[j]/bs]-1;
391 #endif
392           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
393           else {
394             col  = data + idxn[j]%bs;
395             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
396           }
397         }
398       }
399     } else {
400       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local values currently supported");
401     }
402   }
403  PetscFunctionReturn(0);
404 }
405 
406 #undef __FUNCT__
407 #define __FUNCT__ "MatNorm_MPISBAIJ"
408 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
409 {
410   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
411   PetscErrorCode ierr;
412   PetscReal      sum[2],*lnorm2;
413 
414   PetscFunctionBegin;
415   if (baij->size == 1) {
416     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
417   } else {
418     if (type == NORM_FROBENIUS) {
419       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
420       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
421       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
422       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
423       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
424       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
425       *norm = sqrt(sum[0] + 2*sum[1]);
426       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
427     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
428       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
429       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
430       PetscReal    *rsum,*rsum2,vabs;
431       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
432       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
433       MatScalar    *v;
434 
435       ierr  = PetscMalloc2(mat->cmap->N,PetscReal,&rsum,mat->cmap->N,PetscReal,&rsum2);CHKERRQ(ierr);
436       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
437       /* Amat */
438       v = amat->a; jj = amat->j;
439       for (brow=0; brow<mbs; brow++) {
440         grow = bs*(rstart + brow);
441         nz = amat->i[brow+1] - amat->i[brow];
442         for (bcol=0; bcol<nz; bcol++){
443           gcol = bs*(rstart + *jj); jj++;
444           for (col=0; col<bs; col++){
445             for (row=0; row<bs; row++){
446               vabs = PetscAbsScalar(*v); v++;
447               rsum[gcol+col] += vabs;
448               /* non-diagonal block */
449               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
450             }
451           }
452         }
453       }
454       /* Bmat */
455       v = bmat->a; jj = bmat->j;
456       for (brow=0; brow<mbs; brow++) {
457         grow = bs*(rstart + brow);
458         nz = bmat->i[brow+1] - bmat->i[brow];
459         for (bcol=0; bcol<nz; bcol++){
460           gcol = bs*garray[*jj]; jj++;
461           for (col=0; col<bs; col++){
462             for (row=0; row<bs; row++){
463               vabs = PetscAbsScalar(*v); v++;
464               rsum[gcol+col] += vabs;
465               rsum[grow+row] += vabs;
466             }
467           }
468         }
469       }
470       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPIU_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
471       *norm = 0.0;
472       for (col=0; col<mat->cmap->N; col++) {
473         if (rsum2[col] > *norm) *norm = rsum2[col];
474       }
475       ierr = PetscFree2(rsum,rsum2);CHKERRQ(ierr);
476     } else {
477       SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"No support for this norm yet");
478     }
479   }
480   PetscFunctionReturn(0);
481 }
482 
483 #undef __FUNCT__
484 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
485 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
486 {
487   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
488   PetscErrorCode ierr;
489   PetscInt       nstash,reallocs;
490   InsertMode     addv;
491 
492   PetscFunctionBegin;
493   if (baij->donotstash || mat->nooffprocentries) {
494     PetscFunctionReturn(0);
495   }
496 
497   /* make sure all processors are either in INSERTMODE or ADDMODE */
498   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
499   if (addv == (ADD_VALUES|INSERT_VALUES)) SETERRQ(((PetscObject)mat)->comm,PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
500   mat->insertmode = addv; /* in case this processor had no cache */
501 
502   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
503   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
504   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
505   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
506   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
507   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
508   PetscFunctionReturn(0);
509 }
510 
511 #undef __FUNCT__
512 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
513 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
514 {
515   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
516   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
517   PetscErrorCode ierr;
518   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
519   PetscInt       *row,*col;
520   PetscBool      other_disassembled;
521   PetscMPIInt    n;
522   PetscBool      r1,r2,r3;
523   MatScalar      *val;
524   InsertMode     addv = mat->insertmode;
525 
526   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
527   PetscFunctionBegin;
528 
529   if (!baij->donotstash &&  !mat->nooffprocentries) {
530     while (1) {
531       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
532       if (!flg) break;
533 
534       for (i=0; i<n;) {
535         /* Now identify the consecutive vals belonging to the same row */
536         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
537         if (j < n) ncols = j-i;
538         else       ncols = n-i;
539         /* Now assemble all these values with a single function call */
540         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
541         i = j;
542       }
543     }
544     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
545     /* Now process the block-stash. Since the values are stashed column-oriented,
546        set the roworiented flag to column oriented, and after MatSetValues()
547        restore the original flags */
548     r1 = baij->roworiented;
549     r2 = a->roworiented;
550     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
551     baij->roworiented = PETSC_FALSE;
552     a->roworiented    = PETSC_FALSE;
553     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
554     while (1) {
555       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
556       if (!flg) break;
557 
558       for (i=0; i<n;) {
559         /* Now identify the consecutive vals belonging to the same row */
560         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
561         if (j < n) ncols = j-i;
562         else       ncols = n-i;
563         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
564         i = j;
565       }
566     }
567     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
568     baij->roworiented = r1;
569     a->roworiented    = r2;
570     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
571   }
572 
573   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
574   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
575 
576   /* determine if any processor has disassembled, if so we must
577      also disassemble ourselfs, in order that we may reassemble. */
578   /*
579      if nonzero structure of submatrix B cannot change then we know that
580      no processor disassembled thus we can skip this stuff
581   */
582   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
583     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
584     if (mat->was_assembled && !other_disassembled) {
585       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
586     }
587   }
588 
589   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
590     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
591   }
592   ierr = MatSetOption(baij->B,MAT_CHECK_COMPRESSED_ROW,PETSC_TRUE);CHKERRQ(ierr);
593   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
594   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
595 
596   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
597   baij->rowvalues = 0;
598 
599   PetscFunctionReturn(0);
600 }
601 
602 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
603 #undef __FUNCT__
604 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
605 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
606 {
607   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
608   PetscErrorCode    ierr;
609   PetscInt          bs = mat->rmap->bs;
610   PetscMPIInt       size = baij->size,rank = baij->rank;
611   PetscBool         iascii,isdraw;
612   PetscViewer       sviewer;
613   PetscViewerFormat format;
614 
615   PetscFunctionBegin;
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
617   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
618   if (iascii) {
619     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
620     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
621       MatInfo info;
622       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
623       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
624       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_TRUE);CHKERRQ(ierr);
625       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",rank,mat->rmap->n,(PetscInt)info.nz_used,(PetscInt)info.nz_allocated,mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIISynchronizedAllow(viewer,PETSC_FALSE);CHKERRQ(ierr);
632       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
633       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
634       PetscFunctionReturn(0);
635     } else if (format == PETSC_VIEWER_ASCII_INFO) {
636       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
637       PetscFunctionReturn(0);
638     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
639       PetscFunctionReturn(0);
640     }
641   }
642 
643   if (isdraw) {
644     PetscDraw  draw;
645     PetscBool  isnull;
646     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
647     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
648   }
649 
650   if (size == 1) {
651     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
652     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
653   } else {
654     /* assemble the entire matrix onto first processor. */
655     Mat          A;
656     Mat_SeqSBAIJ *Aloc;
657     Mat_SeqBAIJ  *Bloc;
658     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
659     MatScalar    *a;
660 
661     /* Should this be the same type as mat? */
662     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
663     if (!rank) {
664       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
665     } else {
666       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
667     }
668     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
669     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
670     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
671 
672     /* copy over the A part */
673     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
674     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
675     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
676 
677     for (i=0; i<mbs; i++) {
678       rvals[0] = bs*(baij->rstartbs + i);
679       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
680       for (j=ai[i]; j<ai[i+1]; j++) {
681         col = (baij->cstartbs+aj[j])*bs;
682         for (k=0; k<bs; k++) {
683           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
684           col++; a += bs;
685         }
686       }
687     }
688     /* copy over the B part */
689     Bloc = (Mat_SeqBAIJ*)baij->B->data;
690     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
691     for (i=0; i<mbs; i++) {
692 
693       rvals[0] = bs*(baij->rstartbs + i);
694       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
695       for (j=ai[i]; j<ai[i+1]; j++) {
696         col = baij->garray[aj[j]]*bs;
697         for (k=0; k<bs; k++) {
698           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
699           col++; a += bs;
700         }
701       }
702     }
703     ierr = PetscFree(rvals);CHKERRQ(ierr);
704     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
706     /*
707        Everyone has to call to draw the matrix since the graphics waits are
708        synchronized across all processors that share the PetscDraw object
709     */
710     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
711     if (!rank) {
712       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
713           /* Set the type name to MATMPISBAIJ so that the correct type can be printed out by PetscObjectPrintClassNamePrefixType() in MatView_SeqSBAIJ_ASCII()*/
714       PetscStrcpy(((PetscObject)((Mat_MPISBAIJ*)(A->data))->A)->type_name,MATMPISBAIJ);
715       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
716     }
717     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
718     ierr = MatDestroy(&A);CHKERRQ(ierr);
719   }
720   PetscFunctionReturn(0);
721 }
722 
723 #undef __FUNCT__
724 #define __FUNCT__ "MatView_MPISBAIJ"
725 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
726 {
727   PetscErrorCode ierr;
728   PetscBool      iascii,isdraw,issocket,isbinary;
729 
730   PetscFunctionBegin;
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERASCII,&iascii);CHKERRQ(ierr);
732   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERDRAW,&isdraw);CHKERRQ(ierr);
733   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERSOCKET,&issocket);CHKERRQ(ierr);
734   ierr = PetscTypeCompare((PetscObject)viewer,PETSCVIEWERBINARY,&isbinary);CHKERRQ(ierr);
735   if (iascii || isdraw || issocket || isbinary) {
736     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
737   } else {
738     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
739   }
740   PetscFunctionReturn(0);
741 }
742 
743 #undef __FUNCT__
744 #define __FUNCT__ "MatDestroy_MPISBAIJ"
745 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
746 {
747   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
748   PetscErrorCode ierr;
749 
750   PetscFunctionBegin;
751 #if defined(PETSC_USE_LOG)
752   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
753 #endif
754   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
755   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
756   ierr = MatDestroy(&baij->A);CHKERRQ(ierr);
757   ierr = MatDestroy(&baij->B);CHKERRQ(ierr);
758 #if defined (PETSC_USE_CTABLE)
759   ierr = PetscTableDestroy(&baij->colmap);CHKERRQ(ierr);
760 #else
761   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
762 #endif
763   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
764   ierr = VecDestroy(&baij->lvec);CHKERRQ(ierr);
765   ierr = VecScatterDestroy(&baij->Mvctx);CHKERRQ(ierr);
766   ierr = VecDestroy(&baij->slvec0);CHKERRQ(ierr);
767   ierr = VecDestroy(&baij->slvec0b);CHKERRQ(ierr);
768   ierr = VecDestroy(&baij->slvec1);CHKERRQ(ierr);
769   ierr = VecDestroy(&baij->slvec1a);CHKERRQ(ierr);
770   ierr = VecDestroy(&baij->slvec1b);CHKERRQ(ierr);
771   ierr = VecScatterDestroy(&baij->sMvctx);CHKERRQ(ierr);
772   ierr = PetscFree2(baij->rowvalues,baij->rowindices);CHKERRQ(ierr);
773   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
774   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
775   ierr = VecDestroy(&baij->diag);CHKERRQ(ierr);
776   ierr = VecDestroy(&baij->bb1);CHKERRQ(ierr);
777   ierr = VecDestroy(&baij->xx1);CHKERRQ(ierr);
778 #if defined(PETSC_USE_REAL_MAT_SINGLE)
779   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
780 #endif
781   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
782   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
783   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
784   ierr = PetscFree(mat->data);CHKERRQ(ierr);
785 
786   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
789   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
790   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
791   PetscFunctionReturn(0);
792 }
793 
794 #undef __FUNCT__
795 #define __FUNCT__ "MatMult_MPISBAIJ_Hermitian"
796 PetscErrorCode MatMult_MPISBAIJ_Hermitian(Mat A,Vec xx,Vec yy)
797 {
798   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
799   PetscErrorCode ierr;
800   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
801   PetscScalar    *x,*from;
802 
803   PetscFunctionBegin;
804   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
805   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
806 
807   /* diagonal part */
808   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
809   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
810 
811   /* subdiagonal part */
812   ierr = (*a->B->ops->multhermitiantranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
813 
814   /* copy x into the vec slvec0 */
815   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
816   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
817 
818   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
819   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   /* supperdiagonal part */
825   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMult_MPISBAIJ"
831 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
832 {
833   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
834   PetscErrorCode ierr;
835   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
836   PetscScalar    *x,*from;
837 
838   PetscFunctionBegin;
839   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
840   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
841 
842   /* diagonal part */
843   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
844   ierr = VecSet(a->slvec1b,0.0);CHKERRQ(ierr);
845 
846   /* subdiagonal part */
847   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
848 
849   /* copy x into the vec slvec0 */
850   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
851   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
852 
853   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
854   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
855   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
856 
857   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
858   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
859   /* supperdiagonal part */
860   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
861   PetscFunctionReturn(0);
862 }
863 
864 #undef __FUNCT__
865 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
866 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
867 {
868   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
869   PetscErrorCode ierr;
870   PetscInt       nt;
871 
872   PetscFunctionBegin;
873   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
874   if (nt != A->cmap->n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
875 
876   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
877   if (nt != A->rmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
878 
879   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
880   /* do diagonal part */
881   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
882   /* do supperdiagonal part */
883   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
884   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
885   /* do subdiagonal part */
886   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
887   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
888   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
895 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
900   PetscScalar    *x,*from,zero=0.0;
901 
902   PetscFunctionBegin;
903   /*
904   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
905   PetscSynchronizedFlush(((PetscObject)A)->comm);
906   */
907   /* diagonal part */
908   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
909   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
910 
911   /* subdiagonal part */
912   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
913 
914   /* copy x into the vec slvec0 */
915   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
916   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
917   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
918   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
919 
920   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
921   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
922   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923 
924   /* supperdiagonal part */
925   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
926 
927   PetscFunctionReturn(0);
928 }
929 
930 #undef __FUNCT__
931 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
932 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
933 {
934   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
935   PetscErrorCode ierr;
936 
937   PetscFunctionBegin;
938   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
939   /* do diagonal part */
940   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
941   /* do supperdiagonal part */
942   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
943   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
944 
945   /* do subdiagonal part */
946   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
947   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
948   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
949 
950   PetscFunctionReturn(0);
951 }
952 
953 /*
954   This only works correctly for square matrices where the subblock A->A is the
955    diagonal block
956 */
957 #undef __FUNCT__
958 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
959 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
960 {
961   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
962   PetscErrorCode ierr;
963 
964   PetscFunctionBegin;
965   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
966   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
967   PetscFunctionReturn(0);
968 }
969 
970 #undef __FUNCT__
971 #define __FUNCT__ "MatScale_MPISBAIJ"
972 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
973 {
974   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
975   PetscErrorCode ierr;
976 
977   PetscFunctionBegin;
978   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
979   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatGetRow_MPISBAIJ"
985 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
986 {
987   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
988   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
989   PetscErrorCode ierr;
990   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
991   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
992   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
993 
994   PetscFunctionBegin;
995   if (mat->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Already active");
996   mat->getrowactive = PETSC_TRUE;
997 
998   if (!mat->rowvalues && (idx || v)) {
999     /*
1000         allocate enough space to hold information from the longest row.
1001     */
1002     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1003     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1004     PetscInt     max = 1,mbs = mat->mbs,tmp;
1005     for (i=0; i<mbs; i++) {
1006       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1007       if (max < tmp) { max = tmp; }
1008     }
1009     ierr = PetscMalloc2(max*bs2,PetscScalar,&mat->rowvalues,max*bs2,PetscInt,&mat->rowindices);CHKERRQ(ierr);
1010   }
1011 
1012   if (row < brstart || row >= brend) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Only local rows");
1013   lrow = row - brstart;  /* local row index */
1014 
1015   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1016   if (!v)   {pvA = 0; pvB = 0;}
1017   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1018   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1019   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1020   nztot = nzA + nzB;
1021 
1022   cmap  = mat->garray;
1023   if (v  || idx) {
1024     if (nztot) {
1025       /* Sort by increasing column numbers, assuming A and B already sorted */
1026       PetscInt imark = -1;
1027       if (v) {
1028         *v = v_p = mat->rowvalues;
1029         for (i=0; i<nzB; i++) {
1030           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1031           else break;
1032         }
1033         imark = i;
1034         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1035         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1036       }
1037       if (idx) {
1038         *idx = idx_p = mat->rowindices;
1039         if (imark > -1) {
1040           for (i=0; i<imark; i++) {
1041             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1042           }
1043         } else {
1044           for (i=0; i<nzB; i++) {
1045             if (cmap[cworkB[i]/bs] < cstart)
1046               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1047             else break;
1048           }
1049           imark = i;
1050         }
1051         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1052         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1053       }
1054     } else {
1055       if (idx) *idx = 0;
1056       if (v)   *v   = 0;
1057     }
1058   }
1059   *nz = nztot;
1060   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1061   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1067 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1068 {
1069   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1070 
1071   PetscFunctionBegin;
1072   if (!baij->getrowactive) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1073   baij->getrowactive = PETSC_FALSE;
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1079 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1080 {
1081   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1082   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1083 
1084   PetscFunctionBegin;
1085   aA->getrow_utriangular = PETSC_TRUE;
1086   PetscFunctionReturn(0);
1087 }
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1090 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1091 {
1092   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1093   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1094 
1095   PetscFunctionBegin;
1096   aA->getrow_utriangular = PETSC_FALSE;
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1102 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1103 {
1104   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1105   PetscErrorCode ierr;
1106 
1107   PetscFunctionBegin;
1108   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1109   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1110   PetscFunctionReturn(0);
1111 }
1112 
1113 #undef __FUNCT__
1114 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1115 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1116 {
1117   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1118   PetscErrorCode ierr;
1119 
1120   PetscFunctionBegin;
1121   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1122   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1123   PetscFunctionReturn(0);
1124 }
1125 
1126 #undef __FUNCT__
1127 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1128 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1129 {
1130   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1131   PetscErrorCode ierr;
1132 
1133   PetscFunctionBegin;
1134   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1135   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1136   PetscFunctionReturn(0);
1137 }
1138 
1139 #undef __FUNCT__
1140 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1141 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1142 {
1143   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1144   Mat            A = a->A,B = a->B;
1145   PetscErrorCode ierr;
1146   PetscReal      isend[5],irecv[5];
1147 
1148   PetscFunctionBegin;
1149   info->block_size     = (PetscReal)matin->rmap->bs;
1150   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1151   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1152   isend[3] = info->memory;  isend[4] = info->mallocs;
1153   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1155   isend[3] += info->memory;  isend[4] += info->mallocs;
1156   if (flag == MAT_LOCAL) {
1157     info->nz_used      = isend[0];
1158     info->nz_allocated = isend[1];
1159     info->nz_unneeded  = isend[2];
1160     info->memory       = isend[3];
1161     info->mallocs      = isend[4];
1162   } else if (flag == MAT_GLOBAL_MAX) {
1163     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1164     info->nz_used      = irecv[0];
1165     info->nz_allocated = irecv[1];
1166     info->nz_unneeded  = irecv[2];
1167     info->memory       = irecv[3];
1168     info->mallocs      = irecv[4];
1169   } else if (flag == MAT_GLOBAL_SUM) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPIU_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else {
1177     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1178   }
1179   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1180   info->fill_ratio_needed = 0;
1181   info->factor_mallocs    = 0;
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1187 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscBool  flg)
1188 {
1189   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1190   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1191   PetscErrorCode ierr;
1192 
1193   PetscFunctionBegin;
1194   switch (op) {
1195   case MAT_NEW_NONZERO_LOCATIONS:
1196   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1197   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1198   case MAT_KEEP_NONZERO_PATTERN:
1199   case MAT_NEW_NONZERO_LOCATION_ERR:
1200     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1201     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1202     break;
1203   case MAT_ROW_ORIENTED:
1204     a->roworiented = flg;
1205     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1206     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1207     break;
1208   case MAT_NEW_DIAGONALS:
1209     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1210     break;
1211   case MAT_IGNORE_OFF_PROC_ENTRIES:
1212     a->donotstash = flg;
1213     break;
1214   case MAT_USE_HASH_TABLE:
1215     a->ht_flag = flg;
1216     break;
1217   case MAT_HERMITIAN:
1218     if (!A->assembled) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONGSTATE,"Must call MatAssemblyEnd() first");
1219     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1220     A->ops->mult = MatMult_MPISBAIJ_Hermitian;
1221     break;
1222   case MAT_SPD:
1223     A->spd_set                         = PETSC_TRUE;
1224     A->spd                             = flg;
1225     if (flg) {
1226       A->symmetric                     = PETSC_TRUE;
1227       A->structurally_symmetric        = PETSC_TRUE;
1228       A->symmetric_set                 = PETSC_TRUE;
1229       A->structurally_symmetric_set    = PETSC_TRUE;
1230     }
1231     break;
1232   case MAT_SYMMETRIC:
1233     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1234     break;
1235   case MAT_STRUCTURALLY_SYMMETRIC:
1236     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1237     break;
1238   case MAT_SYMMETRY_ETERNAL:
1239     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Matrix must be symmetric");
1240     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1241     break;
1242   case MAT_IGNORE_LOWER_TRIANGULAR:
1243     aA->ignore_ltriangular = flg;
1244     break;
1245   case MAT_ERROR_LOWER_TRIANGULAR:
1246     aA->ignore_ltriangular = flg;
1247     break;
1248   case MAT_GETROW_UPPERTRIANGULAR:
1249     aA->getrow_utriangular = flg;
1250     break;
1251   default:
1252     SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"unknown option %d",op);
1253   }
1254   PetscFunctionReturn(0);
1255 }
1256 
1257 #undef __FUNCT__
1258 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1259 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1260 {
1261   PetscErrorCode ierr;
1262   PetscFunctionBegin;
1263   if (MAT_INITIAL_MATRIX || *B != A) {
1264     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1265   }
1266   PetscFunctionReturn(0);
1267 }
1268 
1269 #undef __FUNCT__
1270 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1271 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1272 {
1273   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1274   Mat            a=baij->A, b=baij->B;
1275   PetscErrorCode ierr;
1276   PetscInt       nv,m,n;
1277   PetscBool      flg;
1278 
1279   PetscFunctionBegin;
1280   if (ll != rr){
1281     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1282     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1283   }
1284   if (!ll) PetscFunctionReturn(0);
1285 
1286   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1287   if (m != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1288 
1289   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1290   if (nv!=n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1291 
1292   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1293 
1294   /* left diagonalscale the off-diagonal part */
1295   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1296 
1297   /* scale the diagonal part */
1298   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1299 
1300   /* right diagonalscale the off-diagonal part */
1301   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1302   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1303   PetscFunctionReturn(0);
1304 }
1305 
1306 #undef __FUNCT__
1307 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1308 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1309 {
1310   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1311   PetscErrorCode ierr;
1312 
1313   PetscFunctionBegin;
1314   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatEqual_MPISBAIJ"
1322 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscBool  *flag)
1323 {
1324   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1325   Mat            a,b,c,d;
1326   PetscBool      flg;
1327   PetscErrorCode ierr;
1328 
1329   PetscFunctionBegin;
1330   a = matA->A; b = matA->B;
1331   c = matB->A; d = matB->B;
1332 
1333   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1334   if (flg) {
1335     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1336   }
1337   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatCopy_MPISBAIJ"
1343 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1344 {
1345   PetscErrorCode ierr;
1346   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1347   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1348 
1349   PetscFunctionBegin;
1350   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1351   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1352     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1353     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1354     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1355   } else {
1356     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1357     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1358   }
1359   PetscFunctionReturn(0);
1360 }
1361 
1362 #undef __FUNCT__
1363 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1364 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1365 {
1366   PetscErrorCode ierr;
1367 
1368   PetscFunctionBegin;
1369   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1375 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1376 {
1377   PetscErrorCode ierr;
1378   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1379   PetscBLASInt   bnz,one=1;
1380   Mat_SeqSBAIJ   *xa,*ya;
1381   Mat_SeqBAIJ    *xb,*yb;
1382 
1383   PetscFunctionBegin;
1384   if (str == SAME_NONZERO_PATTERN) {
1385     PetscScalar alpha = a;
1386     xa = (Mat_SeqSBAIJ *)xx->A->data;
1387     ya = (Mat_SeqSBAIJ *)yy->A->data;
1388     bnz = PetscBLASIntCast(xa->nz);
1389     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1390     xb = (Mat_SeqBAIJ *)xx->B->data;
1391     yb = (Mat_SeqBAIJ *)yy->B->data;
1392     bnz = PetscBLASIntCast(xb->nz);
1393     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1394   } else {
1395     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1396     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1397     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1398   }
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatSetBlockSize_MPISBAIJ"
1404 PetscErrorCode MatSetBlockSize_MPISBAIJ(Mat A,PetscInt bs)
1405 {
1406   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1407   PetscInt        rbs,cbs;
1408   PetscErrorCode  ierr;
1409 
1410   PetscFunctionBegin;
1411   ierr = MatSetBlockSize(a->A,bs);CHKERRQ(ierr);
1412   ierr = MatSetBlockSize(a->B,bs);CHKERRQ(ierr);
1413   ierr = PetscLayoutGetBlockSize(A->rmap,&rbs);CHKERRQ(ierr);
1414   ierr = PetscLayoutGetBlockSize(A->cmap,&cbs);CHKERRQ(ierr);
1415   if (rbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,rbs);
1416   if (cbs != bs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"Attempt to set block size %d with SBAIJ %d",bs,cbs);
1417   PetscFunctionReturn(0);
1418 }
1419 
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1422 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1423 {
1424   PetscErrorCode ierr;
1425   PetscInt       i;
1426   PetscBool      flg;
1427 
1428   PetscFunctionBegin;
1429   for (i=0; i<n; i++) {
1430     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1431     if (!flg) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1432   }
1433   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1434   PetscFunctionReturn(0);
1435 }
1436 
1437 
1438 /* -------------------------------------------------------------------*/
1439 static struct _MatOps MatOps_Values = {
1440        MatSetValues_MPISBAIJ,
1441        MatGetRow_MPISBAIJ,
1442        MatRestoreRow_MPISBAIJ,
1443        MatMult_MPISBAIJ,
1444 /* 4*/ MatMultAdd_MPISBAIJ,
1445        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1446        MatMultAdd_MPISBAIJ,
1447        0,
1448        0,
1449        0,
1450 /*10*/ 0,
1451        0,
1452        0,
1453        MatSOR_MPISBAIJ,
1454        MatTranspose_MPISBAIJ,
1455 /*15*/ MatGetInfo_MPISBAIJ,
1456        MatEqual_MPISBAIJ,
1457        MatGetDiagonal_MPISBAIJ,
1458        MatDiagonalScale_MPISBAIJ,
1459        MatNorm_MPISBAIJ,
1460 /*20*/ MatAssemblyBegin_MPISBAIJ,
1461        MatAssemblyEnd_MPISBAIJ,
1462        MatSetOption_MPISBAIJ,
1463        MatZeroEntries_MPISBAIJ,
1464 /*24*/ 0,
1465        0,
1466        0,
1467        0,
1468        0,
1469 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1470        0,
1471        0,
1472        0,
1473        0,
1474 /*34*/ MatDuplicate_MPISBAIJ,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*39*/ MatAXPY_MPISBAIJ,
1480        MatGetSubMatrices_MPISBAIJ,
1481        MatIncreaseOverlap_MPISBAIJ,
1482        MatGetValues_MPISBAIJ,
1483        MatCopy_MPISBAIJ,
1484 /*44*/ 0,
1485        MatScale_MPISBAIJ,
1486        0,
1487        0,
1488        0,
1489 /*49*/ MatSetBlockSize_MPISBAIJ,
1490        0,
1491        0,
1492        0,
1493        0,
1494 /*54*/ 0,
1495        0,
1496        MatSetUnfactored_MPISBAIJ,
1497        0,
1498        MatSetValuesBlocked_MPISBAIJ,
1499 /*59*/ 0,
1500        0,
1501        0,
1502        0,
1503        0,
1504 /*64*/ 0,
1505        0,
1506        0,
1507        0,
1508        0,
1509 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1510        0,
1511        0,
1512        0,
1513        0,
1514 /*74*/ 0,
1515        0,
1516        0,
1517        0,
1518        0,
1519 /*79*/ 0,
1520        0,
1521        0,
1522        0,
1523        MatLoad_MPISBAIJ,
1524 /*84*/ 0,
1525        0,
1526        0,
1527        0,
1528        0,
1529 /*89*/ 0,
1530        0,
1531        0,
1532        0,
1533        0,
1534 /*94*/ 0,
1535        0,
1536        0,
1537        0,
1538        0,
1539 /*99*/ 0,
1540        0,
1541        0,
1542        0,
1543        0,
1544 /*104*/0,
1545        MatRealPart_MPISBAIJ,
1546        MatImaginaryPart_MPISBAIJ,
1547        MatGetRowUpperTriangular_MPISBAIJ,
1548        MatRestoreRowUpperTriangular_MPISBAIJ,
1549 /*109*/0,
1550        0,
1551        0,
1552        0,
1553        0,
1554 /*114*/0,
1555        0,
1556        0,
1557        0,
1558        0,
1559 /*119*/0,
1560        0,
1561        0,
1562        0
1563 };
1564 
1565 
1566 EXTERN_C_BEGIN
1567 #undef __FUNCT__
1568 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1569 PetscErrorCode  MatGetDiagonalBlock_MPISBAIJ(Mat A,Mat *a)
1570 {
1571   PetscFunctionBegin;
1572   *a = ((Mat_MPISBAIJ *)A->data)->A;
1573   PetscFunctionReturn(0);
1574 }
1575 EXTERN_C_END
1576 
1577 EXTERN_C_BEGIN
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1580 PetscErrorCode  MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1581 {
1582   Mat_MPISBAIJ   *b;
1583   PetscErrorCode ierr;
1584   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1585 
1586   PetscFunctionBegin;
1587   if (bs < 0){
1588     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1589       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1590     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1591     bs   = PetscAbs(bs);
1592   }
1593   if ((d_nnz || o_nnz) && newbs != bs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1594   bs = newbs;
1595 
1596   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1597   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1598   if (d_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1599   if (o_nz < 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1600 
1601   B->rmap->bs = B->cmap->bs = bs;
1602   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1603   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1604 
1605   if (d_nnz) {
1606     for (i=0; i<B->rmap->n/bs; i++) {
1607       if (d_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1608     }
1609   }
1610   if (o_nnz) {
1611     for (i=0; i<B->rmap->n/bs; i++) {
1612       if (o_nnz[i] < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1613     }
1614   }
1615 
1616   b   = (Mat_MPISBAIJ*)B->data;
1617   mbs = B->rmap->n/bs;
1618   Mbs = B->rmap->N/bs;
1619   if (mbs*bs != B->rmap->n) {
1620     SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1621   }
1622 
1623   B->rmap->bs  = bs;
1624   b->bs2 = bs*bs;
1625   b->mbs = mbs;
1626   b->nbs = mbs;
1627   b->Mbs = Mbs;
1628   b->Nbs = Mbs;
1629 
1630   for (i=0; i<=b->size; i++) {
1631     b->rangebs[i] = B->rmap->range[i]/bs;
1632   }
1633   b->rstartbs = B->rmap->rstart/bs;
1634   b->rendbs   = B->rmap->rend/bs;
1635 
1636   b->cstartbs = B->cmap->rstart/bs;
1637   b->cendbs   = B->cmap->rend/bs;
1638 
1639   if (!B->preallocated) {
1640     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1641     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1642     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1643     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1644     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1645     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1646     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1647     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1648     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1649   }
1650 
1651   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1652   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1653   B->preallocated = PETSC_TRUE;
1654   PetscFunctionReturn(0);
1655 }
1656 EXTERN_C_END
1657 
1658 EXTERN_C_BEGIN
1659 #undef __FUNCT__
1660 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR_MPISBAIJ"
1661 PetscErrorCode MatMPISBAIJSetPreallocationCSR_MPISBAIJ(Mat B,PetscInt bs,const PetscInt ii[],const PetscInt jj[],const PetscScalar V[])
1662 {
1663   PetscInt       m,rstart,cstart,cend;
1664   PetscInt       i,j,d,nz,nz_max=0,*d_nnz=0,*o_nnz=0;
1665   const PetscInt *JJ=0;
1666   PetscScalar    *values=0;
1667   PetscErrorCode ierr;
1668 
1669   PetscFunctionBegin;
1670 
1671   if (bs < 1) SETERRQ1(((PetscObject)B)->comm,PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive but it is %D",bs);
1672   ierr = PetscLayoutSetBlockSize(B->rmap,bs);CHKERRQ(ierr);
1673   ierr = PetscLayoutSetBlockSize(B->cmap,bs);CHKERRQ(ierr);
1674   ierr = PetscLayoutSetUp(B->rmap);CHKERRQ(ierr);
1675   ierr = PetscLayoutSetUp(B->cmap);CHKERRQ(ierr);
1676   m      = B->rmap->n/bs;
1677   rstart = B->rmap->rstart/bs;
1678   cstart = B->cmap->rstart/bs;
1679   cend   = B->cmap->rend/bs;
1680 
1681   if (ii[0]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"ii[0] must be 0 but it is %D",ii[0]);
1682   ierr  = PetscMalloc2(m,PetscInt,&d_nnz,m,PetscInt,&o_nnz);CHKERRQ(ierr);
1683   for (i=0; i<m; i++) {
1684     nz = ii[i+1] - ii[i];
1685     if (nz < 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"Local row %D has a negative number of columns %D",i,nz);
1686     nz_max = PetscMax(nz_max,nz);
1687     JJ  = jj + ii[i];
1688     for (j=0; j<nz; j++) {
1689       if (*JJ >= cstart) break;
1690       JJ++;
1691     }
1692     d = 0;
1693     for (; j<nz; j++) {
1694       if (*JJ++ >= cend) break;
1695       d++;
1696     }
1697     d_nnz[i] = d;
1698     o_nnz[i] = nz - d;
1699   }
1700   ierr = MatMPISBAIJSetPreallocation(B,bs,0,d_nnz,0,o_nnz);CHKERRQ(ierr);
1701   ierr = PetscFree2(d_nnz,o_nnz);CHKERRQ(ierr);
1702 
1703   values = (PetscScalar*)V;
1704   if (!values) {
1705     ierr = PetscMalloc(bs*bs*nz_max*sizeof(PetscScalar),&values);CHKERRQ(ierr);
1706     ierr = PetscMemzero(values,bs*bs*nz_max*sizeof(PetscScalar));CHKERRQ(ierr);
1707   }
1708   for (i=0; i<m; i++) {
1709     PetscInt          row    = i + rstart;
1710     PetscInt          ncols  = ii[i+1] - ii[i];
1711     const PetscInt    *icols = jj + ii[i];
1712     const PetscScalar *svals = values + (V ? (bs*bs*ii[i]) : 0);
1713     ierr = MatSetValuesBlocked_MPISBAIJ(B,1,&row,ncols,icols,svals,INSERT_VALUES);CHKERRQ(ierr);
1714   }
1715 
1716   if (!V) { ierr = PetscFree(values);CHKERRQ(ierr); }
1717   ierr = MatAssemblyBegin(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1718   ierr = MatAssemblyEnd(B,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1719 
1720   PetscFunctionReturn(0);
1721 }
1722 EXTERN_C_END
1723 
1724 EXTERN_C_BEGIN
1725 #if defined(PETSC_HAVE_MUMPS)
1726 extern PetscErrorCode  MatGetFactor_sbaij_mumps(Mat,MatFactorType,Mat*);
1727 #endif
1728 #if defined(PETSC_HAVE_SPOOLES)
1729 extern PetscErrorCode  MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1730 #endif
1731 #if defined(PETSC_HAVE_PASTIX)
1732 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1733 #endif
1734 EXTERN_C_END
1735 
1736 /*MC
1737    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1738    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1739 
1740    Options Database Keys:
1741 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1742 
1743   Level: beginner
1744 
1745 .seealso: MatCreateMPISBAIJ
1746 M*/
1747 
1748 EXTERN_C_BEGIN
1749 #undef __FUNCT__
1750 #define __FUNCT__ "MatCreate_MPISBAIJ"
1751 PetscErrorCode  MatCreate_MPISBAIJ(Mat B)
1752 {
1753   Mat_MPISBAIJ   *b;
1754   PetscErrorCode ierr;
1755   PetscBool      flg;
1756 
1757   PetscFunctionBegin;
1758 
1759   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1760   B->data = (void*)b;
1761   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1762 
1763   B->ops->destroy    = MatDestroy_MPISBAIJ;
1764   B->ops->view       = MatView_MPISBAIJ;
1765   B->assembled       = PETSC_FALSE;
1766 
1767   B->insertmode = NOT_SET_VALUES;
1768   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1769   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1770 
1771   /* build local table of row and column ownerships */
1772   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1773 
1774   /* build cache for off array entries formed */
1775   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1776   b->donotstash  = PETSC_FALSE;
1777   b->colmap      = PETSC_NULL;
1778   b->garray      = PETSC_NULL;
1779   b->roworiented = PETSC_TRUE;
1780 
1781   /* stuff used in block assembly */
1782   b->barray       = 0;
1783 
1784   /* stuff used for matrix vector multiply */
1785   b->lvec         = 0;
1786   b->Mvctx        = 0;
1787   b->slvec0       = 0;
1788   b->slvec0b      = 0;
1789   b->slvec1       = 0;
1790   b->slvec1a      = 0;
1791   b->slvec1b      = 0;
1792   b->sMvctx       = 0;
1793 
1794   /* stuff for MatGetRow() */
1795   b->rowindices   = 0;
1796   b->rowvalues    = 0;
1797   b->getrowactive = PETSC_FALSE;
1798 
1799   /* hash table stuff */
1800   b->ht           = 0;
1801   b->hd           = 0;
1802   b->ht_size      = 0;
1803   b->ht_flag      = PETSC_FALSE;
1804   b->ht_fact      = 0;
1805   b->ht_total_ct  = 0;
1806   b->ht_insert_ct = 0;
1807 
1808   /* stuff for MatGetSubMatrices_MPIBAIJ_local() */
1809   b->ijonly       = PETSC_FALSE;
1810 
1811   b->in_loc       = 0;
1812   b->v_loc        = 0;
1813   b->n_loc        = 0;
1814   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1815     ierr = PetscOptionsBool("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1816     if (flg) {
1817       PetscReal fact = 1.39;
1818       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1819       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1820       if (fact <= 1.0) fact = 1.39;
1821       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1822       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1823     }
1824   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1825 
1826 #if defined(PETSC_HAVE_PASTIX)
1827   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_pastix_C",
1828 					   "MatGetFactor_mpisbaij_pastix",
1829 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1830 #endif
1831 #if defined(PETSC_HAVE_MUMPS)
1832   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mumps_C",
1833                                      "MatGetFactor_sbaij_mumps",
1834                                      MatGetFactor_sbaij_mumps);CHKERRQ(ierr);
1835 #endif
1836 #if defined(PETSC_HAVE_SPOOLES)
1837   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_spooles_C",
1838                                      "MatGetFactor_mpisbaij_spooles",
1839                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1840 #endif
1841   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1842                                      "MatStoreValues_MPISBAIJ",
1843                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1844   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1845                                      "MatRetrieveValues_MPISBAIJ",
1846                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1847   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1848                                      "MatGetDiagonalBlock_MPISBAIJ",
1849                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1850   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1851                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1852                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1853   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocationCSR_C",
1854                                      "MatMPISBAIJSetPreallocationCSR_MPISBAIJ",
1855                                      MatMPISBAIJSetPreallocationCSR_MPISBAIJ);CHKERRQ(ierr);
1856   B->symmetric                  = PETSC_TRUE;
1857   B->structurally_symmetric     = PETSC_TRUE;
1858   B->symmetric_set              = PETSC_TRUE;
1859   B->structurally_symmetric_set = PETSC_TRUE;
1860   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1861   PetscFunctionReturn(0);
1862 }
1863 EXTERN_C_END
1864 
1865 /*MC
1866    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1867 
1868    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1869    and MATMPISBAIJ otherwise.
1870 
1871    Options Database Keys:
1872 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1873 
1874   Level: beginner
1875 
1876 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1877 M*/
1878 
1879 EXTERN_C_BEGIN
1880 #undef __FUNCT__
1881 #define __FUNCT__ "MatCreate_SBAIJ"
1882 PetscErrorCode  MatCreate_SBAIJ(Mat A)
1883 {
1884   PetscErrorCode ierr;
1885   PetscMPIInt    size;
1886 
1887   PetscFunctionBegin;
1888   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1889   if (size == 1) {
1890     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1891   } else {
1892     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1893   }
1894   PetscFunctionReturn(0);
1895 }
1896 EXTERN_C_END
1897 
1898 #undef __FUNCT__
1899 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1900 /*@C
1901    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1902    the user should preallocate the matrix storage by setting the parameters
1903    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1904    performance can be increased by more than a factor of 50.
1905 
1906    Collective on Mat
1907 
1908    Input Parameters:
1909 +  A - the matrix
1910 .  bs   - size of blockk
1911 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1912            submatrix  (same for all local rows)
1913 .  d_nnz - array containing the number of block nonzeros in the various block rows
1914            in the upper triangular and diagonal part of the in diagonal portion of the local
1915            (possibly different for each block row) or PETSC_NULL.  You must leave room
1916            for the diagonal entry even if it is zero.
1917 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1918            submatrix (same for all local rows).
1919 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1920            off-diagonal portion of the local submatrix (possibly different for
1921            each block row) or PETSC_NULL.
1922 
1923 
1924    Options Database Keys:
1925 .   -mat_no_unroll - uses code that does not unroll the loops in the
1926                      block calculations (much slower)
1927 .   -mat_block_size - size of the blocks to use
1928 
1929    Notes:
1930 
1931    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1932    than it must be used on all processors that share the object for that argument.
1933 
1934    If the *_nnz parameter is given then the *_nz parameter is ignored
1935 
1936    Storage Information:
1937    For a square global matrix we define each processor's diagonal portion
1938    to be its local rows and the corresponding columns (a square submatrix);
1939    each processor's off-diagonal portion encompasses the remainder of the
1940    local matrix (a rectangular submatrix).
1941 
1942    The user can specify preallocated storage for the diagonal part of
1943    the local submatrix with either d_nz or d_nnz (not both).  Set
1944    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1945    memory allocation.  Likewise, specify preallocated storage for the
1946    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1947 
1948    You can call MatGetInfo() to get information on how effective the preallocation was;
1949    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1950    You can also run with the option -info and look for messages with the string
1951    malloc in them to see if additional memory allocation was needed.
1952 
1953    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1954    the figure below we depict these three local rows and all columns (0-11).
1955 
1956 .vb
1957            0 1 2 3 4 5 6 7 8 9 10 11
1958           -------------------
1959    row 3  |  o o o d d d o o o o o o
1960    row 4  |  o o o d d d o o o o o o
1961    row 5  |  o o o d d d o o o o o o
1962           -------------------
1963 .ve
1964 
1965    Thus, any entries in the d locations are stored in the d (diagonal)
1966    submatrix, and any entries in the o locations are stored in the
1967    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1968    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1969 
1970    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1971    plus the diagonal part of the d matrix,
1972    and o_nz should indicate the number of block nonzeros per row in the upper triangular
1973    part of the o matrix.
1974    In general, for PDE problems in which most nonzeros are near the diagonal,
1975    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1976    or you will get TERRIBLE performance; see the users' manual chapter on
1977    matrices.
1978 
1979    Level: intermediate
1980 
1981 .keywords: matrix, block, aij, compressed row, sparse, parallel
1982 
1983 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1984 @*/
1985 PetscErrorCode  MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1986 {
1987   PetscErrorCode ierr;
1988 
1989   PetscFunctionBegin;
1990   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocation_C",(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]),(B,bs,d_nz,d_nnz,o_nz,o_nnz));CHKERRQ(ierr);
1991   PetscFunctionReturn(0);
1992 }
1993 
1994 #undef __FUNCT__
1995 #define __FUNCT__ "MatCreateMPISBAIJ"
1996 /*@C
1997    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1998    (block compressed row).  For good matrix assembly performance
1999    the user should preallocate the matrix storage by setting the parameters
2000    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
2001    performance can be increased by more than a factor of 50.
2002 
2003    Collective on MPI_Comm
2004 
2005    Input Parameters:
2006 +  comm - MPI communicator
2007 .  bs   - size of blockk
2008 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
2009            This value should be the same as the local size used in creating the
2010            y vector for the matrix-vector product y = Ax.
2011 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
2012            This value should be the same as the local size used in creating the
2013            x vector for the matrix-vector product y = Ax.
2014 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2015 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2016 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
2017            submatrix  (same for all local rows)
2018 .  d_nnz - array containing the number of block nonzeros in the various block rows
2019            in the upper triangular portion of the in diagonal portion of the local
2020            (possibly different for each block block row) or PETSC_NULL.
2021            You must leave room for the diagonal entry even if it is zero.
2022 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
2023            submatrix (same for all local rows).
2024 -  o_nnz - array containing the number of nonzeros in the various block rows of the
2025            off-diagonal portion of the local submatrix (possibly different for
2026            each block row) or PETSC_NULL.
2027 
2028    Output Parameter:
2029 .  A - the matrix
2030 
2031    Options Database Keys:
2032 .   -mat_no_unroll - uses code that does not unroll the loops in the
2033                      block calculations (much slower)
2034 .   -mat_block_size - size of the blocks to use
2035 .   -mat_mpi - use the parallel matrix data structures even on one processor
2036                (defaults to using SeqBAIJ format on one processor)
2037 
2038    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
2039    MatXXXXSetPreallocation() paradgm instead of this routine directly.
2040    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
2041 
2042    Notes:
2043    The number of rows and columns must be divisible by blocksize.
2044    This matrix type does not support complex Hermitian operation.
2045 
2046    The user MUST specify either the local or global matrix dimensions
2047    (possibly both).
2048 
2049    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
2050    than it must be used on all processors that share the object for that argument.
2051 
2052    If the *_nnz parameter is given then the *_nz parameter is ignored
2053 
2054    Storage Information:
2055    For a square global matrix we define each processor's diagonal portion
2056    to be its local rows and the corresponding columns (a square submatrix);
2057    each processor's off-diagonal portion encompasses the remainder of the
2058    local matrix (a rectangular submatrix).
2059 
2060    The user can specify preallocated storage for the diagonal part of
2061    the local submatrix with either d_nz or d_nnz (not both).  Set
2062    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
2063    memory allocation.  Likewise, specify preallocated storage for the
2064    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
2065 
2066    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
2067    the figure below we depict these three local rows and all columns (0-11).
2068 
2069 .vb
2070            0 1 2 3 4 5 6 7 8 9 10 11
2071           -------------------
2072    row 3  |  o o o d d d o o o o o o
2073    row 4  |  o o o d d d o o o o o o
2074    row 5  |  o o o d d d o o o o o o
2075           -------------------
2076 .ve
2077 
2078    Thus, any entries in the d locations are stored in the d (diagonal)
2079    submatrix, and any entries in the o locations are stored in the
2080    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2081    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2082 
2083    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2084    plus the diagonal part of the d matrix,
2085    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2086    In general, for PDE problems in which most nonzeros are near the diagonal,
2087    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2088    or you will get TERRIBLE performance; see the users' manual chapter on
2089    matrices.
2090 
2091    Level: intermediate
2092 
2093 .keywords: matrix, block, aij, compressed row, sparse, parallel
2094 
2095 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2096 @*/
2097 
2098 PetscErrorCode  MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2099 {
2100   PetscErrorCode ierr;
2101   PetscMPIInt    size;
2102 
2103   PetscFunctionBegin;
2104   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2105   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2106   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2107   if (size > 1) {
2108     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2109     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2110   } else {
2111     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2112     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2113   }
2114   PetscFunctionReturn(0);
2115 }
2116 
2117 
2118 #undef __FUNCT__
2119 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2120 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2121 {
2122   Mat            mat;
2123   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2124   PetscErrorCode ierr;
2125   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
2126   PetscScalar    *array;
2127 
2128   PetscFunctionBegin;
2129   *newmat       = 0;
2130   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2131   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
2132   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2133   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2134   ierr = PetscLayoutCopy(matin->rmap,&mat->rmap);CHKERRQ(ierr);
2135   ierr = PetscLayoutCopy(matin->cmap,&mat->cmap);CHKERRQ(ierr);
2136 
2137   mat->factortype   = matin->factortype;
2138   mat->preallocated = PETSC_TRUE;
2139   mat->assembled    = PETSC_TRUE;
2140   mat->insertmode   = NOT_SET_VALUES;
2141 
2142   a = (Mat_MPISBAIJ*)mat->data;
2143   a->bs2   = oldmat->bs2;
2144   a->mbs   = oldmat->mbs;
2145   a->nbs   = oldmat->nbs;
2146   a->Mbs   = oldmat->Mbs;
2147   a->Nbs   = oldmat->Nbs;
2148 
2149 
2150   a->size         = oldmat->size;
2151   a->rank         = oldmat->rank;
2152   a->donotstash   = oldmat->donotstash;
2153   a->roworiented  = oldmat->roworiented;
2154   a->rowindices   = 0;
2155   a->rowvalues    = 0;
2156   a->getrowactive = PETSC_FALSE;
2157   a->barray       = 0;
2158   a->rstartbs    = oldmat->rstartbs;
2159   a->rendbs      = oldmat->rendbs;
2160   a->cstartbs    = oldmat->cstartbs;
2161   a->cendbs      = oldmat->cendbs;
2162 
2163   /* hash table stuff */
2164   a->ht           = 0;
2165   a->hd           = 0;
2166   a->ht_size      = 0;
2167   a->ht_flag      = oldmat->ht_flag;
2168   a->ht_fact      = oldmat->ht_fact;
2169   a->ht_total_ct  = 0;
2170   a->ht_insert_ct = 0;
2171 
2172   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2173   if (oldmat->colmap) {
2174 #if defined (PETSC_USE_CTABLE)
2175     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2176 #else
2177     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2178     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2179     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2180 #endif
2181   } else a->colmap = 0;
2182 
2183   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2184     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2185     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2186     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2187   } else a->garray = 0;
2188 
2189   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2190   ierr = VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2191   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2192   ierr = VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2193   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2194 
2195   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2196   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2197   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2198   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2199 
2200   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2201   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2202   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2203   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2204   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2205   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2206   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2207   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2208   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2209   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2210   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2211   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2212   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2213 
2214   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2215   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2216   a->sMvctx = oldmat->sMvctx;
2217   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2218 
2219   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2220   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2221   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2222   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2223   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2224   *newmat = mat;
2225   PetscFunctionReturn(0);
2226 }
2227 
2228 #undef __FUNCT__
2229 #define __FUNCT__ "MatLoad_MPISBAIJ"
2230 PetscErrorCode MatLoad_MPISBAIJ(Mat newmat,PetscViewer viewer)
2231 {
2232   PetscErrorCode ierr;
2233   PetscInt       i,nz,j,rstart,rend;
2234   PetscScalar    *vals,*buf;
2235   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2236   MPI_Status     status;
2237   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2238   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2239   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2240   PetscInt       bs=1,Mbs,mbs,extra_rows;
2241   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2242   PetscInt       dcount,kmax,k,nzcount,tmp,sizesset=1,grows,gcols;
2243   int            fd;
2244 
2245   PetscFunctionBegin;
2246   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2247     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2248   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2249 
2250   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2251   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2252   if (!rank) {
2253     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2254     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2255     if (header[0] != MAT_FILE_CLASSID) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2256     if (header[3] < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2257   }
2258 
2259   if (newmat->rmap->n < 0 && newmat->rmap->N < 0 && newmat->cmap->n < 0 && newmat->cmap->N < 0) sizesset = 0;
2260 
2261   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2262   M = header[1]; N = header[2];
2263 
2264   /* If global rows/cols are set to PETSC_DECIDE, set it to the sizes given in the file */
2265   if (sizesset && newmat->rmap->N < 0) newmat->rmap->N = M;
2266   if (sizesset && newmat->cmap->N < 0) newmat->cmap->N = N;
2267 
2268   /* If global sizes are set, check if they are consistent with that given in the file */
2269   if (sizesset) {
2270     ierr = MatGetSize(newmat,&grows,&gcols);CHKERRQ(ierr);
2271   }
2272   if (sizesset && newmat->rmap->N != grows) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of rows:Matrix in file has (%d) and input matrix has (%d)",M,grows);
2273   if (sizesset && newmat->cmap->N != gcols) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED, "Inconsistent # of cols:Matrix in file has (%d) and input matrix has (%d)",N,gcols);
2274 
2275   if (M != N) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Can only do square matrices");
2276 
2277   /*
2278      This code adds extra rows to make sure the number of rows is
2279      divisible by the blocksize
2280   */
2281   Mbs        = M/bs;
2282   extra_rows = bs - M + bs*(Mbs);
2283   if (extra_rows == bs) extra_rows = 0;
2284   else                  Mbs++;
2285   if (extra_rows &&!rank) {
2286     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2287   }
2288 
2289   /* determine ownership of all rows */
2290   if (newmat->rmap->n < 0) { /* PETSC_DECIDE */
2291     mbs        = Mbs/size + ((Mbs % size) > rank);
2292     m          = mbs*bs;
2293   } else { /* User Set */
2294     m          = newmat->rmap->n;
2295     mbs        = m/bs;
2296   }
2297   ierr       = PetscMalloc2(size+1,PetscMPIInt,&rowners,size+1,PetscMPIInt,&browners);CHKERRQ(ierr);
2298   mmbs       = PetscMPIIntCast(mbs);
2299   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2300   rowners[0] = 0;
2301   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2302   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2303   rstart = rowners[rank];
2304   rend   = rowners[rank+1];
2305 
2306   /* distribute row lengths to all processors */
2307   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2308   if (!rank) {
2309     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2310     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2311     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2312     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2313     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2314     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2315     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2316   } else {
2317     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2318   }
2319 
2320   if (!rank) {   /* procs[0] */
2321     /* calculate the number of nonzeros on each processor */
2322     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2323     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2324     for (i=0; i<size; i++) {
2325       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2326         procsnz[i] += rowlengths[j];
2327       }
2328     }
2329     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2330 
2331     /* determine max buffer needed and allocate it */
2332     maxnz = 0;
2333     for (i=0; i<size; i++) {
2334       maxnz = PetscMax(maxnz,procsnz[i]);
2335     }
2336     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2337 
2338     /* read in my part of the matrix column indices  */
2339     nz     = procsnz[0];
2340     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2341     mycols = ibuf;
2342     if (size == 1)  nz -= extra_rows;
2343     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2344     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2345 
2346     /* read in every ones (except the last) and ship off */
2347     for (i=1; i<size-1; i++) {
2348       nz   = procsnz[i];
2349       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2350       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2351     }
2352     /* read in the stuff for the last proc */
2353     if (size != 1) {
2354       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2355       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2356       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2357       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2358     }
2359     ierr = PetscFree(cols);CHKERRQ(ierr);
2360   } else {  /* procs[i], i>0 */
2361     /* determine buffer space needed for message */
2362     nz = 0;
2363     for (i=0; i<m; i++) {
2364       nz += locrowlens[i];
2365     }
2366     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2367     mycols = ibuf;
2368     /* receive message of column indices*/
2369     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2370     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2371     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2372   }
2373 
2374   /* loop over local rows, determining number of off diagonal entries */
2375   ierr     = PetscMalloc2(rend-rstart,PetscInt,&dlens,rend-rstart,PetscInt,&odlens);CHKERRQ(ierr);
2376   ierr     = PetscMalloc3(Mbs,PetscInt,&mask,Mbs,PetscInt,&masked1,Mbs,PetscInt,&masked2);CHKERRQ(ierr);
2377   ierr     = PetscMemzero(mask,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2378   ierr     = PetscMemzero(masked1,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2379   ierr     = PetscMemzero(masked2,Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2380   rowcount = 0;
2381   nzcount  = 0;
2382   for (i=0; i<mbs; i++) {
2383     dcount  = 0;
2384     odcount = 0;
2385     for (j=0; j<bs; j++) {
2386       kmax = locrowlens[rowcount];
2387       for (k=0; k<kmax; k++) {
2388         tmp = mycols[nzcount++]/bs; /* block col. index */
2389         if (!mask[tmp]) {
2390           mask[tmp] = 1;
2391           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2392           else masked1[dcount++] = tmp; /* entry in diag portion */
2393         }
2394       }
2395       rowcount++;
2396     }
2397 
2398     dlens[i]  = dcount;  /* d_nzz[i] */
2399     odlens[i] = odcount; /* o_nzz[i] */
2400 
2401     /* zero out the mask elements we set */
2402     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2403     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2404   }
2405     if (!sizesset) {
2406     ierr = MatSetSizes(newmat,m,m,M+extra_rows,N+extra_rows);CHKERRQ(ierr);
2407   }
2408   ierr = MatSetOption(newmat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE);CHKERRQ(ierr);
2409   ierr = MatMPISBAIJSetPreallocation(newmat,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2410 
2411   if (!rank) {
2412     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2413     /* read in my part of the matrix numerical values  */
2414     nz = procsnz[0];
2415     vals = buf;
2416     mycols = ibuf;
2417     if (size == 1)  nz -= extra_rows;
2418     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2419     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2420 
2421     /* insert into matrix */
2422     jj      = rstart*bs;
2423     for (i=0; i<m; i++) {
2424       ierr = MatSetValues(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2425       mycols += locrowlens[i];
2426       vals   += locrowlens[i];
2427       jj++;
2428     }
2429 
2430     /* read in other processors (except the last one) and ship out */
2431     for (i=1; i<size-1; i++) {
2432       nz   = procsnz[i];
2433       vals = buf;
2434       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2435       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2436     }
2437     /* the last proc */
2438     if (size != 1){
2439       nz   = procsnz[i] - extra_rows;
2440       vals = buf;
2441       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2442       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2443       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)newmat)->tag,comm);CHKERRQ(ierr);
2444     }
2445     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2446 
2447   } else {
2448     /* receive numeric values */
2449     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2450 
2451     /* receive message of values*/
2452     vals   = buf;
2453     mycols = ibuf;
2454     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)newmat)->tag,comm,&status);CHKERRQ(ierr);
2455     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2456     if (maxnz != nz) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2457 
2458     /* insert into matrix */
2459     jj      = rstart*bs;
2460     for (i=0; i<m; i++) {
2461       ierr    = MatSetValues_MPISBAIJ(newmat,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2462       mycols += locrowlens[i];
2463       vals   += locrowlens[i];
2464       jj++;
2465     }
2466   }
2467 
2468   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2469   ierr = PetscFree(buf);CHKERRQ(ierr);
2470   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2471   ierr = PetscFree2(rowners,browners);CHKERRQ(ierr);
2472   ierr = PetscFree2(dlens,odlens);CHKERRQ(ierr);
2473   ierr = PetscFree3(mask,masked1,masked2);CHKERRQ(ierr);
2474   ierr = MatAssemblyBegin(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2475   ierr = MatAssemblyEnd(newmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 #undef __FUNCT__
2480 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2481 /*XXXXX@
2482    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2483 
2484    Input Parameters:
2485 .  mat  - the matrix
2486 .  fact - factor
2487 
2488    Not Collective on Mat, each process can have a different hash factor
2489 
2490    Level: advanced
2491 
2492   Notes:
2493    This can also be set by the command line option: -mat_use_hash_table fact
2494 
2495 .keywords: matrix, hashtable, factor, HT
2496 
2497 .seealso: MatSetOption()
2498 @XXXXX*/
2499 
2500 
2501 #undef __FUNCT__
2502 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2503 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2504 {
2505   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2506   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2507   PetscReal      atmp;
2508   PetscReal      *work,*svalues,*rvalues;
2509   PetscErrorCode ierr;
2510   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2511   PetscMPIInt    rank,size;
2512   PetscInt       *rowners_bs,dest,count,source;
2513   PetscScalar    *va;
2514   MatScalar      *ba;
2515   MPI_Status     stat;
2516 
2517   PetscFunctionBegin;
2518   if (idx) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2519   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2520   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2521 
2522   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2523   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2524 
2525   bs   = A->rmap->bs;
2526   mbs  = a->mbs;
2527   Mbs  = a->Mbs;
2528   ba   = b->a;
2529   bi   = b->i;
2530   bj   = b->j;
2531 
2532   /* find ownerships */
2533   rowners_bs = A->rmap->range;
2534 
2535   /* each proc creates an array to be distributed */
2536   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2537   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2538 
2539   /* row_max for B */
2540   if (rank != size-1){
2541     for (i=0; i<mbs; i++) {
2542       ncols = bi[1] - bi[0]; bi++;
2543       brow  = bs*i;
2544       for (j=0; j<ncols; j++){
2545         bcol = bs*(*bj);
2546         for (kcol=0; kcol<bs; kcol++){
2547           col = bcol + kcol;                 /* local col index */
2548           col += rowners_bs[rank+1];      /* global col index */
2549           for (krow=0; krow<bs; krow++){
2550             atmp = PetscAbsScalar(*ba); ba++;
2551             row = brow + krow;    /* local row index */
2552             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2553             if (work[col] < atmp) work[col] = atmp;
2554           }
2555         }
2556         bj++;
2557       }
2558     }
2559 
2560     /* send values to its owners */
2561     for (dest=rank+1; dest<size; dest++){
2562       svalues = work + rowners_bs[dest];
2563       count   = rowners_bs[dest+1]-rowners_bs[dest];
2564       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2565     }
2566   }
2567 
2568   /* receive values */
2569   if (rank){
2570     rvalues = work;
2571     count   = rowners_bs[rank+1]-rowners_bs[rank];
2572     for (source=0; source<rank; source++){
2573       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2574       /* process values */
2575       for (i=0; i<count; i++){
2576         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2577       }
2578     }
2579   }
2580 
2581   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2582   ierr = PetscFree(work);CHKERRQ(ierr);
2583   PetscFunctionReturn(0);
2584 }
2585 
2586 #undef __FUNCT__
2587 #define __FUNCT__ "MatSOR_MPISBAIJ"
2588 PetscErrorCode MatSOR_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2589 {
2590   Mat_MPISBAIJ      *mat = (Mat_MPISBAIJ*)matin->data;
2591   PetscErrorCode    ierr;
2592   PetscInt          mbs=mat->mbs,bs=matin->rmap->bs;
2593   PetscScalar       *x,*ptr,*from;
2594   Vec               bb1;
2595   const PetscScalar *b;
2596 
2597   PetscFunctionBegin;
2598   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2599   if (bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2600 
2601   if (flag == SOR_APPLY_UPPER) {
2602     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2603     PetscFunctionReturn(0);
2604   }
2605 
2606   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2607     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2608       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2609       its--;
2610     }
2611 
2612     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2613     while (its--){
2614 
2615       /* lower triangular part: slvec0b = - B^T*xx */
2616       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2617 
2618       /* copy xx into slvec0a */
2619       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2620       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2621       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2622       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2623 
2624       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2625 
2626       /* copy bb into slvec1a */
2627       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2628       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2629       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2630       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2631 
2632       /* set slvec1b = 0 */
2633       ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2634 
2635       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2636       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2637       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2638       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2639 
2640       /* upper triangular part: bb1 = bb1 - B*x */
2641       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2642 
2643       /* local diagonal sweep */
2644       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2645     }
2646     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2647   } else if ((flag & SOR_LOCAL_FORWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2648     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2649   } else if ((flag & SOR_LOCAL_BACKWARD_SWEEP) && (its == 1) && (flag & SOR_ZERO_INITIAL_GUESS)){
2650     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,1,xx);CHKERRQ(ierr);
2651   } else if (flag & SOR_EISENSTAT) {
2652     Vec               xx1;
2653     PetscBool         hasop;
2654     const PetscScalar *diag;
2655     PetscScalar       *sl,scale = (omega - 2.0)/omega;
2656     PetscInt          i,n;
2657 
2658     if (!mat->xx1) {
2659       ierr = VecDuplicate(bb,&mat->xx1);CHKERRQ(ierr);
2660       ierr = VecDuplicate(bb,&mat->bb1);CHKERRQ(ierr);
2661     }
2662     xx1 = mat->xx1;
2663     bb1 = mat->bb1;
2664 
2665     ierr = (*mat->A->ops->sor)(mat->A,bb,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_BACKWARD_SWEEP),fshift,lits,1,xx);CHKERRQ(ierr);
2666 
2667     if (!mat->diag) {
2668       /* this is wrong for same matrix with new nonzero values */
2669       ierr = MatGetVecs(matin,&mat->diag,PETSC_NULL);CHKERRQ(ierr);
2670       ierr = MatGetDiagonal(matin,mat->diag);CHKERRQ(ierr);
2671     }
2672     ierr = MatHasOperation(matin,MATOP_MULT_DIAGONAL_BLOCK,&hasop);CHKERRQ(ierr);
2673 
2674     if (hasop) {
2675       ierr = MatMultDiagonalBlock(matin,xx,bb1);CHKERRQ(ierr);
2676       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2677     } else {
2678       /*
2679           These two lines are replaced by code that may be a bit faster for a good compiler
2680       ierr = VecPointwiseMult(mat->slvec1a,mat->diag,xx);CHKERRQ(ierr);
2681       ierr = VecAYPX(mat->slvec1a,scale,bb);CHKERRQ(ierr);
2682       */
2683       ierr = VecGetArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2684       ierr = VecGetArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2685       ierr = VecGetArrayRead(bb,&b);CHKERRQ(ierr);
2686       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2687       ierr = VecGetLocalSize(xx,&n);CHKERRQ(ierr);
2688       if (omega == 1.0) {
2689 	for (i=0; i<n; i++) {
2690 	  sl[i] = b[i] - diag[i]*x[i];
2691 	}
2692         ierr = PetscLogFlops(2.0*n);CHKERRQ(ierr);
2693       } else {
2694 	for (i=0; i<n; i++) {
2695 	  sl[i] = b[i] + scale*diag[i]*x[i];
2696 	}
2697         ierr = PetscLogFlops(3.0*n);CHKERRQ(ierr);
2698       }
2699       ierr = VecRestoreArray(mat->slvec1a,&sl);CHKERRQ(ierr);
2700       ierr = VecRestoreArrayRead(mat->diag,&diag);CHKERRQ(ierr);
2701       ierr = VecRestoreArrayRead(bb,&b);CHKERRQ(ierr);
2702       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2703     }
2704 
2705     /* multiply off-diagonal portion of matrix */
2706     ierr = VecSet(mat->slvec1b,0.0);CHKERRQ(ierr);
2707     ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2708     ierr = VecGetArray(mat->slvec0,&from);CHKERRQ(ierr);
2709     ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2710     ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2711     ierr = VecRestoreArray(mat->slvec0,&from);CHKERRQ(ierr);
2712     ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2713     ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2714     ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2715     ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,mat->slvec1a);CHKERRQ(ierr);
2716 
2717     /* local sweep */
2718     ierr = (*mat->A->ops->sor)(mat->A,mat->slvec1a,omega,(MatSORType)(SOR_ZERO_INITIAL_GUESS | SOR_LOCAL_FORWARD_SWEEP),fshift,lits,1,xx1);CHKERRQ(ierr);
2719     ierr = VecAXPY(xx,1.0,xx1);CHKERRQ(ierr);
2720   } else {
2721     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2722   }
2723   PetscFunctionReturn(0);
2724 }
2725 
2726 #undef __FUNCT__
2727 #define __FUNCT__ "MatSOR_MPISBAIJ_2comm"
2728 PetscErrorCode MatSOR_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2729 {
2730   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2731   PetscErrorCode ierr;
2732   Vec            lvec1,bb1;
2733 
2734   PetscFunctionBegin;
2735   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2736   if (matin->rmap->bs > 1) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2737 
2738   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2739     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2740       ierr = (*mat->A->ops->sor)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2741       its--;
2742     }
2743 
2744     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2745     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2746     while (its--){
2747       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2748 
2749       /* lower diagonal part: bb1 = bb - B^T*xx */
2750       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2751       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2752 
2753       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2754       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2755       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2756 
2757       /* upper diagonal part: bb1 = bb1 - B*x */
2758       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2759       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2760 
2761       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2762 
2763       /* diagonal sweep */
2764       ierr = (*mat->A->ops->sor)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2765     }
2766     ierr = VecDestroy(&lvec1);CHKERRQ(ierr);
2767     ierr = VecDestroy(&bb1);CHKERRQ(ierr);
2768   } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2769   PetscFunctionReturn(0);
2770 }
2771 
2772 #undef __FUNCT__
2773 #define __FUNCT__ "MatCreateMPISBAIJWithArrays"
2774 /*@
2775      MatCreateMPISBAIJWithArrays - creates a MPI SBAIJ matrix using arrays that contain in standard
2776          CSR format the local rows.
2777 
2778    Collective on MPI_Comm
2779 
2780    Input Parameters:
2781 +  comm - MPI communicator
2782 .  bs - the block size, only a block size of 1 is supported
2783 .  m - number of local rows (Cannot be PETSC_DECIDE)
2784 .  n - This value should be the same as the local size used in creating the
2785        x vector for the matrix-vector product y = Ax. (or PETSC_DECIDE to have
2786        calculated if N is given) For square matrices n is almost always m.
2787 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
2788 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
2789 .   i - row indices
2790 .   j - column indices
2791 -   a - matrix values
2792 
2793    Output Parameter:
2794 .   mat - the matrix
2795 
2796    Level: intermediate
2797 
2798    Notes:
2799        The i, j, and a arrays ARE copied by this routine into the internal format used by PETSc;
2800      thus you CANNOT change the matrix entries by changing the values of a[] after you have
2801      called this routine. Use MatCreateMPIAIJWithSplitArrays() to avoid needing to copy the arrays.
2802 
2803        The i and j indices are 0 based, and i indices are indices corresponding to the local j array.
2804 
2805 .keywords: matrix, aij, compressed row, sparse, parallel
2806 
2807 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIAIJSetPreallocation(), MatMPIAIJSetPreallocationCSR(),
2808           MPIAIJ, MatCreateMPIAIJ(), MatCreateMPIAIJWithSplitArrays()
2809 @*/
2810 PetscErrorCode  MatCreateMPISBAIJWithArrays(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,const PetscInt i[],const PetscInt j[],const PetscScalar a[],Mat *mat)
2811 {
2812   PetscErrorCode ierr;
2813 
2814 
2815  PetscFunctionBegin;
2816   if (i[0]) {
2817     SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"i (row indices) must start with 0");
2818   }
2819   if (m < 0) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_ARG_OUTOFRANGE,"local number of rows (m) cannot be PETSC_DECIDE, or negative");
2820   ierr = MatCreate(comm,mat);CHKERRQ(ierr);
2821   ierr = MatSetSizes(*mat,m,n,M,N);CHKERRQ(ierr);
2822   ierr = MatSetType(*mat,MATMPISBAIJ);CHKERRQ(ierr);
2823   ierr = MatMPISBAIJSetPreallocationCSR(*mat,bs,i,j,a);CHKERRQ(ierr);
2824   PetscFunctionReturn(0);
2825 }
2826 
2827 
2828 #undef __FUNCT__
2829 #define __FUNCT__ "MatMPISBAIJSetPreallocationCSR"
2830 /*@C
2831    MatMPISBAIJSetPreallocationCSR - Allocates memory for a sparse parallel matrix in BAIJ format
2832    (the default parallel PETSc format).
2833 
2834    Collective on MPI_Comm
2835 
2836    Input Parameters:
2837 +  A - the matrix
2838 .  bs - the block size
2839 .  i - the indices into j for the start of each local row (starts with zero)
2840 .  j - the column indices for each local row (starts with zero) these must be sorted for each row
2841 -  v - optional values in the matrix
2842 
2843    Level: developer
2844 
2845 .keywords: matrix, aij, compressed row, sparse, parallel
2846 
2847 .seealso: MatCreate(), MatCreateSeqAIJ(), MatSetValues(), MatMPIBAIJSetPreallocation(), MatCreateMPIAIJ(), MPIAIJ
2848 @*/
2849 PetscErrorCode  MatMPISBAIJSetPreallocationCSR(Mat B,PetscInt bs,const PetscInt i[],const PetscInt j[], const PetscScalar v[])
2850 {
2851   PetscErrorCode ierr;
2852 
2853   PetscFunctionBegin;
2854   ierr = PetscTryMethod(B,"MatMPISBAIJSetPreallocationCSR_C",(Mat,PetscInt,const PetscInt[],const PetscInt[],const PetscScalar[]),(B,bs,i,j,v));CHKERRQ(ierr);
2855   PetscFunctionReturn(0);
2856 }
2857 
2858 
2859