xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d4bf62d159b4eec940d9872c577cf78ea329f539)
1 #define PETSCMAT_DLL
2 
3 #include "../src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "../src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap->rstart;
146   PetscInt       rend_orig=mat->rmap->rend,cstart_orig=mat->cmap->rstart;
147   PetscInt       cend_orig=mat->cmap->rend,bs=mat->rmap->bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169 
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap->N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap->N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap->bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++  = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend){
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         }
309         else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined (PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined (PETSC_USE_CTABLE)
330 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col =  in[j];
338             }
339           }
340           else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs=mat->rmap->bs,i,j,bsrstart = mat->rmap->rstart,bsrend = mat->rmap->rend;
364   PetscInt       bscstart = mat->cmap->rstart,bscend = mat->cmap->rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap->N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap->N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend){
376           col = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined (PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data --;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else {
396       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
397     }
398   }
399  PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "MatNorm_MPISBAIJ"
404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
405 {
406   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
407   PetscErrorCode ierr;
408   PetscReal      sum[2],*lnorm2;
409 
410   PetscFunctionBegin;
411   if (baij->size == 1) {
412     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
413   } else {
414     if (type == NORM_FROBENIUS) {
415       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
416       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
418       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
420       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
421       *norm = sqrt(sum[0] + 2*sum[1]);
422       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
423     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
424       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
425       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
426       PetscReal    *rsum,*rsum2,vabs;
427       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
428       PetscInt     brow,bcol,col,bs=baij->A->rmap->bs,row,grow,gcol,mbs=amat->mbs;
429       MatScalar    *v;
430 
431       ierr  = PetscMalloc((2*mat->cmap->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
432       rsum2 = rsum + mat->cmap->N;
433       ierr  = PetscMemzero(rsum,mat->cmap->N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++){
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++){
442             for (row=0; row<bs; row++){
443               vabs = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++){
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++){
459             for (row=0; row<bs; row++){
460               vabs = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap->N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap->N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree(rsum);CHKERRQ(ierr);
473     } else {
474       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
475     }
476   }
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
482 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
483 {
484   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
485   PetscErrorCode ierr;
486   PetscInt       nstash,reallocs;
487   InsertMode     addv;
488 
489   PetscFunctionBegin;
490   if (baij->donotstash) {
491     PetscFunctionReturn(0);
492   }
493 
494   /* make sure all processors are either in INSERTMODE or ADDMODE */
495   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
496   if (addv == (ADD_VALUES|INSERT_VALUES)) {
497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
498   }
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap->range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap->bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap->N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap->bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap->N,N = mat->cmap->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap->N,mat->cmap->N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776 #if defined(PETSC_USE_MAT_SINGLE)
777   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
778 #endif
779   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
780   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
781   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
782   ierr = PetscFree(baij);CHKERRQ(ierr);
783 
784   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatMult_MPISBAIJ"
794 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
795 {
796   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
797   PetscErrorCode ierr;
798   PetscInt       nt,mbs=a->mbs,bs=A->rmap->bs;
799   PetscScalar    *x,*from,zero=0.0;
800 
801   PetscFunctionBegin;
802   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
803   if (nt != A->cmap->n) {
804     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
805   }
806 
807   /* diagonal part */
808   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
809   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
810 
811   /* subdiagonal part */
812   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
813 
814   /* copy x into the vec slvec0 */
815   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
816   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
817 
818   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
819   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   /* supperdiagonal part */
825   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
831 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
832 {
833   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
834   PetscErrorCode ierr;
835   PetscInt       nt;
836 
837   PetscFunctionBegin;
838   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
839   if (nt != A->cmap->n) {
840     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
841   }
842   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
843   if (nt != A->rmap->N) {
844     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
845   }
846 
847   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
848   /* do diagonal part */
849   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
850   /* do supperdiagonal part */
851   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
852   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
853   /* do subdiagonal part */
854   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
855   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
856   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
857 
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
863 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
864 {
865   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
866   PetscErrorCode ierr;
867   PetscInt       mbs=a->mbs,bs=A->rmap->bs;
868   PetscScalar    *x,*from,zero=0.0;
869 
870   PetscFunctionBegin;
871   /*
872   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
873   PetscSynchronizedFlush(((PetscObject)A)->comm);
874   */
875   /* diagonal part */
876   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
877   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
878 
879   /* subdiagonal part */
880   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
881 
882   /* copy x into the vec slvec0 */
883   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
884   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
885   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
886   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
887 
888   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
890   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891 
892   /* supperdiagonal part */
893   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
894 
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
900 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
901 {
902   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
903   PetscErrorCode ierr;
904 
905   PetscFunctionBegin;
906   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907   /* do diagonal part */
908   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
909   /* do supperdiagonal part */
910   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
911   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
912 
913   /* do subdiagonal part */
914   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
915   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
916   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
917 
918   PetscFunctionReturn(0);
919 }
920 
921 /*
922   This only works correctly for square matrices where the subblock A->A is the
923    diagonal block
924 */
925 #undef __FUNCT__
926 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
927 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   /* if (a->rmap->N != a->cmap->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
934   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatScale_MPISBAIJ"
940 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
941 {
942   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
943   PetscErrorCode ierr;
944 
945   PetscFunctionBegin;
946   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
947   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatGetRow_MPISBAIJ"
953 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
954 {
955   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
956   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
957   PetscErrorCode ierr;
958   PetscInt       bs = matin->rmap->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
959   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap->rstart,brend = matin->rmap->rend;
960   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
961 
962   PetscFunctionBegin;
963   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
964   mat->getrowactive = PETSC_TRUE;
965 
966   if (!mat->rowvalues && (idx || v)) {
967     /*
968         allocate enough space to hold information from the longest row.
969     */
970     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
971     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
972     PetscInt     max = 1,mbs = mat->mbs,tmp;
973     for (i=0; i<mbs; i++) {
974       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
975       if (max < tmp) { max = tmp; }
976     }
977     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
978     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
979   }
980 
981   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
982   lrow = row - brstart;  /* local row index */
983 
984   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
985   if (!v)   {pvA = 0; pvB = 0;}
986   if (!idx) {pcA = 0; if (!v) pcB = 0;}
987   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
988   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
989   nztot = nzA + nzB;
990 
991   cmap  = mat->garray;
992   if (v  || idx) {
993     if (nztot) {
994       /* Sort by increasing column numbers, assuming A and B already sorted */
995       PetscInt imark = -1;
996       if (v) {
997         *v = v_p = mat->rowvalues;
998         for (i=0; i<nzB; i++) {
999           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1000           else break;
1001         }
1002         imark = i;
1003         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1004         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1005       }
1006       if (idx) {
1007         *idx = idx_p = mat->rowindices;
1008         if (imark > -1) {
1009           for (i=0; i<imark; i++) {
1010             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1011           }
1012         } else {
1013           for (i=0; i<nzB; i++) {
1014             if (cmap[cworkB[i]/bs] < cstart)
1015               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1016             else break;
1017           }
1018           imark = i;
1019         }
1020         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1021         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1022       }
1023     } else {
1024       if (idx) *idx = 0;
1025       if (v)   *v   = 0;
1026     }
1027   }
1028   *nz = nztot;
1029   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1030   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1036 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1037 {
1038   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1039 
1040   PetscFunctionBegin;
1041   if (!baij->getrowactive) {
1042     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1043   }
1044   baij->getrowactive = PETSC_FALSE;
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1050 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1051 {
1052   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1053   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1054 
1055   PetscFunctionBegin;
1056   aA->getrow_utriangular = PETSC_TRUE;
1057   PetscFunctionReturn(0);
1058 }
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1061 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1062 {
1063   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1064   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1065 
1066   PetscFunctionBegin;
1067   aA->getrow_utriangular = PETSC_FALSE;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1073 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1074 {
1075   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1080   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1086 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1087 {
1088   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1093   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1099 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1100 {
1101   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1106   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1112 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1113 {
1114   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1115   Mat            A = a->A,B = a->B;
1116   PetscErrorCode ierr;
1117   PetscReal      isend[5],irecv[5];
1118 
1119   PetscFunctionBegin;
1120   info->block_size     = (PetscReal)matin->rmap->bs;
1121   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1122   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1123   isend[3] = info->memory;  isend[4] = info->mallocs;
1124   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1125   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1126   isend[3] += info->memory;  isend[4] += info->mallocs;
1127   if (flag == MAT_LOCAL) {
1128     info->nz_used      = isend[0];
1129     info->nz_allocated = isend[1];
1130     info->nz_unneeded  = isend[2];
1131     info->memory       = isend[3];
1132     info->mallocs      = isend[4];
1133   } else if (flag == MAT_GLOBAL_MAX) {
1134     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1135     info->nz_used      = irecv[0];
1136     info->nz_allocated = irecv[1];
1137     info->nz_unneeded  = irecv[2];
1138     info->memory       = irecv[3];
1139     info->mallocs      = irecv[4];
1140   } else if (flag == MAT_GLOBAL_SUM) {
1141     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1142     info->nz_used      = irecv[0];
1143     info->nz_allocated = irecv[1];
1144     info->nz_unneeded  = irecv[2];
1145     info->memory       = irecv[3];
1146     info->mallocs      = irecv[4];
1147   } else {
1148     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1149   }
1150   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1151   info->fill_ratio_needed = 0;
1152   info->factor_mallocs    = 0;
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1158 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1159 {
1160   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1161   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1162   PetscErrorCode ierr;
1163 
1164   PetscFunctionBegin;
1165   switch (op) {
1166   case MAT_NEW_NONZERO_LOCATIONS:
1167   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1168   case MAT_UNUSED_NONZERO_LOCATION_ERR:
1169   case MAT_KEEP_NONZERO_PATTERN:
1170   case MAT_NEW_NONZERO_LOCATION_ERR:
1171     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1172     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1173     break;
1174   case MAT_ROW_ORIENTED:
1175     a->roworiented = flg;
1176     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1178     break;
1179   case MAT_NEW_DIAGONALS:
1180     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1181     break;
1182   case MAT_IGNORE_OFF_PROC_ENTRIES:
1183     a->donotstash = flg;
1184     break;
1185   case MAT_USE_HASH_TABLE:
1186     a->ht_flag = flg;
1187     break;
1188   case MAT_HERMITIAN:
1189     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1190   case MAT_SYMMETRIC:
1191   case MAT_STRUCTURALLY_SYMMETRIC:
1192   case MAT_SYMMETRY_ETERNAL:
1193     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1194     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1195     break;
1196   case MAT_IGNORE_LOWER_TRIANGULAR:
1197     aA->ignore_ltriangular = flg;
1198     break;
1199   case MAT_ERROR_LOWER_TRIANGULAR:
1200     aA->ignore_ltriangular = flg;
1201     break;
1202   case MAT_GETROW_UPPERTRIANGULAR:
1203     aA->getrow_utriangular = flg;
1204     break;
1205   default:
1206     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1207   }
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1213 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1214 {
1215   PetscErrorCode ierr;
1216   PetscFunctionBegin;
1217   if (MAT_INITIAL_MATRIX || *B != A) {
1218     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1219   }
1220   PetscFunctionReturn(0);
1221 }
1222 
1223 #undef __FUNCT__
1224 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1225 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1226 {
1227   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1228   Mat            a=baij->A, b=baij->B;
1229   PetscErrorCode ierr;
1230   PetscInt       nv,m,n;
1231   PetscTruth     flg;
1232 
1233   PetscFunctionBegin;
1234   if (ll != rr){
1235     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1236     if (!flg)
1237       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1238   }
1239   if (!ll) PetscFunctionReturn(0);
1240 
1241   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1242   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1243 
1244   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1245   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1246 
1247   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1248 
1249   /* left diagonalscale the off-diagonal part */
1250   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1251 
1252   /* scale the diagonal part */
1253   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1254 
1255   /* right diagonalscale the off-diagonal part */
1256   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1257   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1263 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1264 {
1265   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1266   PetscErrorCode ierr;
1267 
1268   PetscFunctionBegin;
1269   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1270   PetscFunctionReturn(0);
1271 }
1272 
1273 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1274 
1275 #undef __FUNCT__
1276 #define __FUNCT__ "MatEqual_MPISBAIJ"
1277 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1278 {
1279   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1280   Mat            a,b,c,d;
1281   PetscTruth     flg;
1282   PetscErrorCode ierr;
1283 
1284   PetscFunctionBegin;
1285   a = matA->A; b = matA->B;
1286   c = matB->A; d = matB->B;
1287 
1288   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1289   if (flg) {
1290     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1291   }
1292   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1293   PetscFunctionReturn(0);
1294 }
1295 
1296 #undef __FUNCT__
1297 #define __FUNCT__ "MatCopy_MPISBAIJ"
1298 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1299 {
1300   PetscErrorCode ierr;
1301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1302   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1303 
1304   PetscFunctionBegin;
1305   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1306   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1307     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1308     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1309     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1310   } else {
1311     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1312     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1313   }
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1319 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1320 {
1321   PetscErrorCode ierr;
1322 
1323   PetscFunctionBegin;
1324   ierr = MatMPISBAIJSetPreallocation(A,-PetscMax(A->rmap->bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1325   PetscFunctionReturn(0);
1326 }
1327 
1328 #include "petscblaslapack.h"
1329 #undef __FUNCT__
1330 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1331 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1332 {
1333   PetscErrorCode ierr;
1334   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1335   PetscBLASInt   bnz,one=1;
1336   Mat_SeqSBAIJ   *xa,*ya;
1337   Mat_SeqBAIJ    *xb,*yb;
1338 
1339   PetscFunctionBegin;
1340   if (str == SAME_NONZERO_PATTERN) {
1341     PetscScalar alpha = a;
1342     xa = (Mat_SeqSBAIJ *)xx->A->data;
1343     ya = (Mat_SeqSBAIJ *)yy->A->data;
1344     bnz = PetscBLASIntCast(xa->nz);
1345     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1346     xb = (Mat_SeqBAIJ *)xx->B->data;
1347     yb = (Mat_SeqBAIJ *)yy->B->data;
1348     bnz = PetscBLASIntCast(xb->nz);
1349     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1350   } else {
1351     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1352     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1353     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1354   }
1355   PetscFunctionReturn(0);
1356 }
1357 
1358 #undef __FUNCT__
1359 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1360 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1361 {
1362   PetscErrorCode ierr;
1363   PetscInt       i;
1364   PetscTruth     flg;
1365 
1366   PetscFunctionBegin;
1367   for (i=0; i<n; i++) {
1368     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1369     if (!flg) {
1370       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1371     }
1372   }
1373   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 
1378 /* -------------------------------------------------------------------*/
1379 static struct _MatOps MatOps_Values = {
1380        MatSetValues_MPISBAIJ,
1381        MatGetRow_MPISBAIJ,
1382        MatRestoreRow_MPISBAIJ,
1383        MatMult_MPISBAIJ,
1384 /* 4*/ MatMultAdd_MPISBAIJ,
1385        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1386        MatMultAdd_MPISBAIJ,
1387        0,
1388        0,
1389        0,
1390 /*10*/ 0,
1391        0,
1392        0,
1393        MatRelax_MPISBAIJ,
1394        MatTranspose_MPISBAIJ,
1395 /*15*/ MatGetInfo_MPISBAIJ,
1396        MatEqual_MPISBAIJ,
1397        MatGetDiagonal_MPISBAIJ,
1398        MatDiagonalScale_MPISBAIJ,
1399        MatNorm_MPISBAIJ,
1400 /*20*/ MatAssemblyBegin_MPISBAIJ,
1401        MatAssemblyEnd_MPISBAIJ,
1402        MatSetOption_MPISBAIJ,
1403        MatZeroEntries_MPISBAIJ,
1404 /*24*/ 0,
1405        0,
1406        0,
1407        0,
1408        0,
1409 /*29*/ MatSetUpPreallocation_MPISBAIJ,
1410        0,
1411        0,
1412        0,
1413        0,
1414 /*34*/ MatDuplicate_MPISBAIJ,
1415        0,
1416        0,
1417        0,
1418        0,
1419 /*39*/ MatAXPY_MPISBAIJ,
1420        MatGetSubMatrices_MPISBAIJ,
1421        MatIncreaseOverlap_MPISBAIJ,
1422        MatGetValues_MPISBAIJ,
1423        MatCopy_MPISBAIJ,
1424 /*44*/ 0,
1425        MatScale_MPISBAIJ,
1426        0,
1427        0,
1428        0,
1429 /*49*/ 0,
1430        0,
1431        0,
1432        0,
1433        0,
1434 /*54*/ 0,
1435        0,
1436        MatSetUnfactored_MPISBAIJ,
1437        0,
1438        MatSetValuesBlocked_MPISBAIJ,
1439 /*59*/ 0,
1440        0,
1441        0,
1442        0,
1443        0,
1444 /*64*/ 0,
1445        0,
1446        0,
1447        0,
1448        0,
1449 /*69*/ MatGetRowMaxAbs_MPISBAIJ,
1450        0,
1451        0,
1452        0,
1453        0,
1454 /*74*/ 0,
1455        0,
1456        0,
1457        0,
1458        0,
1459 /*79*/ 0,
1460        0,
1461        0,
1462        0,
1463        MatLoad_MPISBAIJ,
1464 /*84*/ 0,
1465        0,
1466        0,
1467        0,
1468        0,
1469 /*89*/ 0,
1470        0,
1471        0,
1472        0,
1473        0,
1474 /*94*/ 0,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*99*/ 0,
1480        0,
1481        0,
1482        0,
1483        0,
1484 /*104*/0,
1485        MatRealPart_MPISBAIJ,
1486        MatImaginaryPart_MPISBAIJ,
1487        MatGetRowUpperTriangular_MPISBAIJ,
1488        MatRestoreRowUpperTriangular_MPISBAIJ
1489 };
1490 
1491 
1492 EXTERN_C_BEGIN
1493 #undef __FUNCT__
1494 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1495 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1496 {
1497   PetscFunctionBegin;
1498   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1499   *iscopy = PETSC_FALSE;
1500   PetscFunctionReturn(0);
1501 }
1502 EXTERN_C_END
1503 
1504 EXTERN_C_BEGIN
1505 #undef __FUNCT__
1506 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1507 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1508 {
1509   Mat_MPISBAIJ   *b;
1510   PetscErrorCode ierr;
1511   PetscInt       i,mbs,Mbs,newbs = PetscAbs(bs);
1512 
1513   PetscFunctionBegin;
1514   if (bs < 0){
1515     ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1516       ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",newbs,&newbs,PETSC_NULL);CHKERRQ(ierr);
1517     ierr = PetscOptionsEnd();CHKERRQ(ierr);
1518     bs   = PetscAbs(bs);
1519   }
1520   if ((d_nnz || o_nnz) && newbs != bs) {
1521     SETERRQ(PETSC_ERR_ARG_WRONG,"Cannot change blocksize from command line if setting d_nnz or o_nnz");
1522   }
1523   bs = newbs;
1524 
1525   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1526   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1527   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1528   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1529 
1530   B->rmap->bs = B->cmap->bs = bs;
1531   ierr = PetscMapSetUp(B->rmap);CHKERRQ(ierr);
1532   ierr = PetscMapSetUp(B->cmap);CHKERRQ(ierr);
1533 
1534   if (d_nnz) {
1535     for (i=0; i<B->rmap->n/bs; i++) {
1536       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1537     }
1538   }
1539   if (o_nnz) {
1540     for (i=0; i<B->rmap->n/bs; i++) {
1541       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1542     }
1543   }
1544 
1545   b   = (Mat_MPISBAIJ*)B->data;
1546   mbs = B->rmap->n/bs;
1547   Mbs = B->rmap->N/bs;
1548   if (mbs*bs != B->rmap->n) {
1549     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap->N,bs);
1550   }
1551 
1552   B->rmap->bs  = bs;
1553   b->bs2 = bs*bs;
1554   b->mbs = mbs;
1555   b->nbs = mbs;
1556   b->Mbs = Mbs;
1557   b->Nbs = Mbs;
1558 
1559   for (i=0; i<=b->size; i++) {
1560     b->rangebs[i] = B->rmap->range[i]/bs;
1561   }
1562   b->rstartbs = B->rmap->rstart/bs;
1563   b->rendbs   = B->rmap->rend/bs;
1564 
1565   b->cstartbs = B->cmap->rstart/bs;
1566   b->cendbs   = B->cmap->rend/bs;
1567 
1568   if (!B->preallocated) {
1569     ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1570     ierr = MatSetSizes(b->A,B->rmap->n,B->cmap->n,B->rmap->n,B->cmap->n);CHKERRQ(ierr);
1571     ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1572     ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1573     ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1574     ierr = MatSetSizes(b->B,B->rmap->n,B->cmap->N,B->rmap->n,B->cmap->N);CHKERRQ(ierr);
1575     ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1576     ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1577     /* build cache for off array entries formed */
1578     ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1579   }
1580 
1581   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1582   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1583   B->preallocated = PETSC_TRUE;
1584   PetscFunctionReturn(0);
1585 }
1586 EXTERN_C_END
1587 
1588 EXTERN_C_BEGIN
1589 #if defined(PETSC_HAVE_MUMPS)
1590 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1591 #endif
1592 #if defined(PETSC_HAVE_SPOOLES)
1593 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1594 #endif
1595 #if defined(PETSC_HAVE_PASTIX)
1596 extern PetscErrorCode MatGetFactor_mpisbaij_pastix(Mat,MatFactorType,Mat*);
1597 #endif
1598 EXTERN_C_END
1599 
1600 /*MC
1601    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1602    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1603 
1604    Options Database Keys:
1605 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1606 
1607   Level: beginner
1608 
1609 .seealso: MatCreateMPISBAIJ
1610 M*/
1611 
1612 EXTERN_C_BEGIN
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatCreate_MPISBAIJ"
1615 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1616 {
1617   Mat_MPISBAIJ   *b;
1618   PetscErrorCode ierr;
1619   PetscTruth     flg;
1620 
1621   PetscFunctionBegin;
1622 
1623   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1624   B->data = (void*)b;
1625   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1626 
1627   B->ops->destroy    = MatDestroy_MPISBAIJ;
1628   B->ops->view       = MatView_MPISBAIJ;
1629   B->mapping         = 0;
1630   B->assembled       = PETSC_FALSE;
1631 
1632   B->insertmode = NOT_SET_VALUES;
1633   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1634   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1635 
1636   /* build local table of row and column ownerships */
1637   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1638 
1639   /* build cache for off array entries formed */
1640   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1641   b->donotstash  = PETSC_FALSE;
1642   b->colmap      = PETSC_NULL;
1643   b->garray      = PETSC_NULL;
1644   b->roworiented = PETSC_TRUE;
1645 
1646   /* stuff used in block assembly */
1647   b->barray       = 0;
1648 
1649   /* stuff used for matrix vector multiply */
1650   b->lvec         = 0;
1651   b->Mvctx        = 0;
1652   b->slvec0       = 0;
1653   b->slvec0b      = 0;
1654   b->slvec1       = 0;
1655   b->slvec1a      = 0;
1656   b->slvec1b      = 0;
1657   b->sMvctx       = 0;
1658 
1659   /* stuff for MatGetRow() */
1660   b->rowindices   = 0;
1661   b->rowvalues    = 0;
1662   b->getrowactive = PETSC_FALSE;
1663 
1664   /* hash table stuff */
1665   b->ht           = 0;
1666   b->hd           = 0;
1667   b->ht_size      = 0;
1668   b->ht_flag      = PETSC_FALSE;
1669   b->ht_fact      = 0;
1670   b->ht_total_ct  = 0;
1671   b->ht_insert_ct = 0;
1672 
1673   b->in_loc       = 0;
1674   b->v_loc        = 0;
1675   b->n_loc        = 0;
1676   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1677     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1678     if (flg) {
1679       PetscReal fact = 1.39;
1680       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1681       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1682       if (fact <= 1.0) fact = 1.39;
1683       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1684       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1685     }
1686   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1687 
1688 #if defined(PETSC_HAVE_PASTIX)
1689   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpisbaij_pastix_C",
1690 					   "MatGetFactor_mpisbaij_pastix",
1691 					   MatGetFactor_mpisbaij_pastix);CHKERRQ(ierr);
1692 #endif
1693 #if defined(PETSC_HAVE_MUMPS)
1694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpisbaij_mumps_C",
1695                                      "MatGetFactor_mpisbaij_mumps",
1696                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1697 #endif
1698 #if defined(PETSC_HAVE_SPOOLES)
1699   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpisbaij_spooles_C",
1700                                      "MatGetFactor_mpisbaij_spooles",
1701                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1702 #endif
1703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1704                                      "MatStoreValues_MPISBAIJ",
1705                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1707                                      "MatRetrieveValues_MPISBAIJ",
1708                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1709   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1710                                      "MatGetDiagonalBlock_MPISBAIJ",
1711                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1712   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1713                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1714                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1715   B->symmetric                  = PETSC_TRUE;
1716   B->structurally_symmetric     = PETSC_TRUE;
1717   B->symmetric_set              = PETSC_TRUE;
1718   B->structurally_symmetric_set = PETSC_TRUE;
1719   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1720   PetscFunctionReturn(0);
1721 }
1722 EXTERN_C_END
1723 
1724 /*MC
1725    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1726 
1727    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1728    and MATMPISBAIJ otherwise.
1729 
1730    Options Database Keys:
1731 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1732 
1733   Level: beginner
1734 
1735 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1736 M*/
1737 
1738 EXTERN_C_BEGIN
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatCreate_SBAIJ"
1741 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1742 {
1743   PetscErrorCode ierr;
1744   PetscMPIInt    size;
1745 
1746   PetscFunctionBegin;
1747   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1748   if (size == 1) {
1749     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1750   } else {
1751     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1752   }
1753   PetscFunctionReturn(0);
1754 }
1755 EXTERN_C_END
1756 
1757 #undef __FUNCT__
1758 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1759 /*@C
1760    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1761    the user should preallocate the matrix storage by setting the parameters
1762    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1763    performance can be increased by more than a factor of 50.
1764 
1765    Collective on Mat
1766 
1767    Input Parameters:
1768 +  A - the matrix
1769 .  bs   - size of blockk
1770 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1771            submatrix  (same for all local rows)
1772 .  d_nnz - array containing the number of block nonzeros in the various block rows
1773            in the upper triangular and diagonal part of the in diagonal portion of the local
1774            (possibly different for each block row) or PETSC_NULL.  You must leave room
1775            for the diagonal entry even if it is zero.
1776 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1777            submatrix (same for all local rows).
1778 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1779            off-diagonal portion of the local submatrix (possibly different for
1780            each block row) or PETSC_NULL.
1781 
1782 
1783    Options Database Keys:
1784 .   -mat_no_unroll - uses code that does not unroll the loops in the
1785                      block calculations (much slower)
1786 .   -mat_block_size - size of the blocks to use
1787 
1788    Notes:
1789 
1790    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1791    than it must be used on all processors that share the object for that argument.
1792 
1793    If the *_nnz parameter is given then the *_nz parameter is ignored
1794 
1795    Storage Information:
1796    For a square global matrix we define each processor's diagonal portion
1797    to be its local rows and the corresponding columns (a square submatrix);
1798    each processor's off-diagonal portion encompasses the remainder of the
1799    local matrix (a rectangular submatrix).
1800 
1801    The user can specify preallocated storage for the diagonal part of
1802    the local submatrix with either d_nz or d_nnz (not both).  Set
1803    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1804    memory allocation.  Likewise, specify preallocated storage for the
1805    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1806 
1807    You can call MatGetInfo() to get information on how effective the preallocation was;
1808    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1809    You can also run with the option -info and look for messages with the string
1810    malloc in them to see if additional memory allocation was needed.
1811 
1812    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1813    the figure below we depict these three local rows and all columns (0-11).
1814 
1815 .vb
1816            0 1 2 3 4 5 6 7 8 9 10 11
1817           -------------------
1818    row 3  |  o o o d d d o o o o o o
1819    row 4  |  o o o d d d o o o o o o
1820    row 5  |  o o o d d d o o o o o o
1821           -------------------
1822 .ve
1823 
1824    Thus, any entries in the d locations are stored in the d (diagonal)
1825    submatrix, and any entries in the o locations are stored in the
1826    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1827    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1828 
1829    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1830    plus the diagonal part of the d matrix,
1831    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1832    In general, for PDE problems in which most nonzeros are near the diagonal,
1833    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1834    or you will get TERRIBLE performance; see the users' manual chapter on
1835    matrices.
1836 
1837    Level: intermediate
1838 
1839 .keywords: matrix, block, aij, compressed row, sparse, parallel
1840 
1841 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1842 @*/
1843 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1844 {
1845   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1846 
1847   PetscFunctionBegin;
1848   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1849   if (f) {
1850     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1851   }
1852   PetscFunctionReturn(0);
1853 }
1854 
1855 #undef __FUNCT__
1856 #define __FUNCT__ "MatCreateMPISBAIJ"
1857 /*@C
1858    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1859    (block compressed row).  For good matrix assembly performance
1860    the user should preallocate the matrix storage by setting the parameters
1861    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1862    performance can be increased by more than a factor of 50.
1863 
1864    Collective on MPI_Comm
1865 
1866    Input Parameters:
1867 +  comm - MPI communicator
1868 .  bs   - size of blockk
1869 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1870            This value should be the same as the local size used in creating the
1871            y vector for the matrix-vector product y = Ax.
1872 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1873            This value should be the same as the local size used in creating the
1874            x vector for the matrix-vector product y = Ax.
1875 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1876 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1877 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1878            submatrix  (same for all local rows)
1879 .  d_nnz - array containing the number of block nonzeros in the various block rows
1880            in the upper triangular portion of the in diagonal portion of the local
1881            (possibly different for each block block row) or PETSC_NULL.
1882            You must leave room for the diagonal entry even if it is zero.
1883 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1884            submatrix (same for all local rows).
1885 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1886            off-diagonal portion of the local submatrix (possibly different for
1887            each block row) or PETSC_NULL.
1888 
1889    Output Parameter:
1890 .  A - the matrix
1891 
1892    Options Database Keys:
1893 .   -mat_no_unroll - uses code that does not unroll the loops in the
1894                      block calculations (much slower)
1895 .   -mat_block_size - size of the blocks to use
1896 .   -mat_mpi - use the parallel matrix data structures even on one processor
1897                (defaults to using SeqBAIJ format on one processor)
1898 
1899    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1900    MatXXXXSetPreallocation() paradgm instead of this routine directly.
1901    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1902 
1903    Notes:
1904    The number of rows and columns must be divisible by blocksize.
1905    This matrix type does not support complex Hermitian operation.
1906 
1907    The user MUST specify either the local or global matrix dimensions
1908    (possibly both).
1909 
1910    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1911    than it must be used on all processors that share the object for that argument.
1912 
1913    If the *_nnz parameter is given then the *_nz parameter is ignored
1914 
1915    Storage Information:
1916    For a square global matrix we define each processor's diagonal portion
1917    to be its local rows and the corresponding columns (a square submatrix);
1918    each processor's off-diagonal portion encompasses the remainder of the
1919    local matrix (a rectangular submatrix).
1920 
1921    The user can specify preallocated storage for the diagonal part of
1922    the local submatrix with either d_nz or d_nnz (not both).  Set
1923    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1924    memory allocation.  Likewise, specify preallocated storage for the
1925    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1926 
1927    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1928    the figure below we depict these three local rows and all columns (0-11).
1929 
1930 .vb
1931            0 1 2 3 4 5 6 7 8 9 10 11
1932           -------------------
1933    row 3  |  o o o d d d o o o o o o
1934    row 4  |  o o o d d d o o o o o o
1935    row 5  |  o o o d d d o o o o o o
1936           -------------------
1937 .ve
1938 
1939    Thus, any entries in the d locations are stored in the d (diagonal)
1940    submatrix, and any entries in the o locations are stored in the
1941    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1942    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1943 
1944    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1945    plus the diagonal part of the d matrix,
1946    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1947    In general, for PDE problems in which most nonzeros are near the diagonal,
1948    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1949    or you will get TERRIBLE performance; see the users' manual chapter on
1950    matrices.
1951 
1952    Level: intermediate
1953 
1954 .keywords: matrix, block, aij, compressed row, sparse, parallel
1955 
1956 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1957 @*/
1958 
1959 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1960 {
1961   PetscErrorCode ierr;
1962   PetscMPIInt    size;
1963 
1964   PetscFunctionBegin;
1965   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1966   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1967   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1968   if (size > 1) {
1969     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1970     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1971   } else {
1972     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1973     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1974   }
1975   PetscFunctionReturn(0);
1976 }
1977 
1978 
1979 #undef __FUNCT__
1980 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1981 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1982 {
1983   Mat            mat;
1984   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1985   PetscErrorCode ierr;
1986   PetscInt       len=0,nt,bs=matin->rmap->bs,mbs=oldmat->mbs;
1987   PetscScalar    *array;
1988 
1989   PetscFunctionBegin;
1990   *newmat       = 0;
1991   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
1992   ierr = MatSetSizes(mat,matin->rmap->n,matin->cmap->n,matin->rmap->N,matin->cmap->N);CHKERRQ(ierr);
1993   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
1994   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1995   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->rmap,mat->rmap);CHKERRQ(ierr);
1996   ierr = PetscMapCopy(((PetscObject)matin)->comm,matin->cmap,mat->cmap);CHKERRQ(ierr);
1997 
1998   mat->factor       = matin->factor;
1999   mat->preallocated = PETSC_TRUE;
2000   mat->assembled    = PETSC_TRUE;
2001   mat->insertmode   = NOT_SET_VALUES;
2002 
2003   a = (Mat_MPISBAIJ*)mat->data;
2004   a->bs2   = oldmat->bs2;
2005   a->mbs   = oldmat->mbs;
2006   a->nbs   = oldmat->nbs;
2007   a->Mbs   = oldmat->Mbs;
2008   a->Nbs   = oldmat->Nbs;
2009 
2010 
2011   a->size         = oldmat->size;
2012   a->rank         = oldmat->rank;
2013   a->donotstash   = oldmat->donotstash;
2014   a->roworiented  = oldmat->roworiented;
2015   a->rowindices   = 0;
2016   a->rowvalues    = 0;
2017   a->getrowactive = PETSC_FALSE;
2018   a->barray       = 0;
2019   a->rstartbs    = oldmat->rstartbs;
2020   a->rendbs      = oldmat->rendbs;
2021   a->cstartbs    = oldmat->cstartbs;
2022   a->cendbs      = oldmat->cendbs;
2023 
2024   /* hash table stuff */
2025   a->ht           = 0;
2026   a->hd           = 0;
2027   a->ht_size      = 0;
2028   a->ht_flag      = oldmat->ht_flag;
2029   a->ht_fact      = oldmat->ht_fact;
2030   a->ht_total_ct  = 0;
2031   a->ht_insert_ct = 0;
2032 
2033   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2034   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2035   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap->bs,&mat->bstash);CHKERRQ(ierr);
2036   if (oldmat->colmap) {
2037 #if defined (PETSC_USE_CTABLE)
2038     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2039 #else
2040     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2041     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2042     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2043 #endif
2044   } else a->colmap = 0;
2045 
2046   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2047     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2048     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2049     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2050   } else a->garray = 0;
2051 
2052   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2053   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2054   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2055   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2056 
2057   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2058   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2059   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2060   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2061 
2062   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2063   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2064   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2065   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2066   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2067   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2068   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2069   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2070   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2071   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2072   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2073   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2074   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2075 
2076   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2077   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2078   a->sMvctx = oldmat->sMvctx;
2079   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2080 
2081   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2082   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2083   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2084   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2085   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2086   *newmat = mat;
2087   PetscFunctionReturn(0);
2088 }
2089 
2090 #include "petscsys.h"
2091 
2092 #undef __FUNCT__
2093 #define __FUNCT__ "MatLoad_MPISBAIJ"
2094 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, const MatType type,Mat *newmat)
2095 {
2096   Mat            A;
2097   PetscErrorCode ierr;
2098   PetscInt       i,nz,j,rstart,rend;
2099   PetscScalar    *vals,*buf;
2100   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2101   MPI_Status     status;
2102   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2103   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2104   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2105   PetscInt       bs=1,Mbs,mbs,extra_rows;
2106   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2107   PetscInt       dcount,kmax,k,nzcount,tmp;
2108   int            fd;
2109 
2110   PetscFunctionBegin;
2111   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2112     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2113   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2114 
2115   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2116   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2117   if (!rank) {
2118     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2119     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2120     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2121     if (header[3] < 0) {
2122       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2123     }
2124   }
2125 
2126   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2127   M = header[1]; N = header[2];
2128 
2129   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2130 
2131   /*
2132      This code adds extra rows to make sure the number of rows is
2133      divisible by the blocksize
2134   */
2135   Mbs        = M/bs;
2136   extra_rows = bs - M + bs*(Mbs);
2137   if (extra_rows == bs) extra_rows = 0;
2138   else                  Mbs++;
2139   if (extra_rows &&!rank) {
2140     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2141   }
2142 
2143   /* determine ownership of all rows */
2144   mbs        = Mbs/size + ((Mbs % size) > rank);
2145   m          = mbs*bs;
2146   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2147   browners   = rowners + size + 1;
2148   mmbs       = PetscMPIIntCast(mbs);
2149   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2150   rowners[0] = 0;
2151   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2152   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2153   rstart = rowners[rank];
2154   rend   = rowners[rank+1];
2155 
2156   /* distribute row lengths to all processors */
2157   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2158   if (!rank) {
2159     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2160     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2161     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2162     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2163     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2164     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2165     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2166   } else {
2167     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2168   }
2169 
2170   if (!rank) {   /* procs[0] */
2171     /* calculate the number of nonzeros on each processor */
2172     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2173     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2174     for (i=0; i<size; i++) {
2175       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2176         procsnz[i] += rowlengths[j];
2177       }
2178     }
2179     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2180 
2181     /* determine max buffer needed and allocate it */
2182     maxnz = 0;
2183     for (i=0; i<size; i++) {
2184       maxnz = PetscMax(maxnz,procsnz[i]);
2185     }
2186     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2187 
2188     /* read in my part of the matrix column indices  */
2189     nz     = procsnz[0];
2190     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2191     mycols = ibuf;
2192     if (size == 1)  nz -= extra_rows;
2193     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2194     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2195 
2196     /* read in every ones (except the last) and ship off */
2197     for (i=1; i<size-1; i++) {
2198       nz   = procsnz[i];
2199       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2200       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2201     }
2202     /* read in the stuff for the last proc */
2203     if (size != 1) {
2204       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2205       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2206       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2207       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2208     }
2209     ierr = PetscFree(cols);CHKERRQ(ierr);
2210   } else {  /* procs[i], i>0 */
2211     /* determine buffer space needed for message */
2212     nz = 0;
2213     for (i=0; i<m; i++) {
2214       nz += locrowlens[i];
2215     }
2216     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2217     mycols = ibuf;
2218     /* receive message of column indices*/
2219     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2220     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2221     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2222   }
2223 
2224   /* loop over local rows, determining number of off diagonal entries */
2225   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2226   odlens   = dlens + (rend-rstart);
2227   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2228   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2229   masked1  = mask    + Mbs;
2230   masked2  = masked1 + Mbs;
2231   rowcount = 0; nzcount = 0;
2232   for (i=0; i<mbs; i++) {
2233     dcount  = 0;
2234     odcount = 0;
2235     for (j=0; j<bs; j++) {
2236       kmax = locrowlens[rowcount];
2237       for (k=0; k<kmax; k++) {
2238         tmp = mycols[nzcount++]/bs; /* block col. index */
2239         if (!mask[tmp]) {
2240           mask[tmp] = 1;
2241           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2242           else masked1[dcount++] = tmp; /* entry in diag portion */
2243         }
2244       }
2245       rowcount++;
2246     }
2247 
2248     dlens[i]  = dcount;  /* d_nzz[i] */
2249     odlens[i] = odcount; /* o_nzz[i] */
2250 
2251     /* zero out the mask elements we set */
2252     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2253     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2254   }
2255 
2256   /* create our matrix */
2257   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2258   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2259   ierr = MatSetType(A,type);CHKERRQ(ierr);
2260   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2261 
2262   if (!rank) {
2263     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2264     /* read in my part of the matrix numerical values  */
2265     nz = procsnz[0];
2266     vals = buf;
2267     mycols = ibuf;
2268     if (size == 1)  nz -= extra_rows;
2269     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2270     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2271 
2272     /* insert into matrix */
2273     jj      = rstart*bs;
2274     for (i=0; i<m; i++) {
2275       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2276       mycols += locrowlens[i];
2277       vals   += locrowlens[i];
2278       jj++;
2279     }
2280 
2281     /* read in other processors (except the last one) and ship out */
2282     for (i=1; i<size-1; i++) {
2283       nz   = procsnz[i];
2284       vals = buf;
2285       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2286       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2287     }
2288     /* the last proc */
2289     if (size != 1){
2290       nz   = procsnz[i] - extra_rows;
2291       vals = buf;
2292       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2293       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2294       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2295     }
2296     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2297 
2298   } else {
2299     /* receive numeric values */
2300     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2301 
2302     /* receive message of values*/
2303     vals   = buf;
2304     mycols = ibuf;
2305     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2306     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2307     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2308 
2309     /* insert into matrix */
2310     jj      = rstart*bs;
2311     for (i=0; i<m; i++) {
2312       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2313       mycols += locrowlens[i];
2314       vals   += locrowlens[i];
2315       jj++;
2316     }
2317   }
2318 
2319   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2320   ierr = PetscFree(buf);CHKERRQ(ierr);
2321   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2322   ierr = PetscFree(rowners);CHKERRQ(ierr);
2323   ierr = PetscFree(dlens);CHKERRQ(ierr);
2324   ierr = PetscFree(mask);CHKERRQ(ierr);
2325   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2326   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2327   *newmat = A;
2328   PetscFunctionReturn(0);
2329 }
2330 
2331 #undef __FUNCT__
2332 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2333 /*XXXXX@
2334    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2335 
2336    Input Parameters:
2337 .  mat  - the matrix
2338 .  fact - factor
2339 
2340    Collective on Mat
2341 
2342    Level: advanced
2343 
2344   Notes:
2345    This can also be set by the command line option: -mat_use_hash_table fact
2346 
2347 .keywords: matrix, hashtable, factor, HT
2348 
2349 .seealso: MatSetOption()
2350 @XXXXX*/
2351 
2352 
2353 #undef __FUNCT__
2354 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2355 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2356 {
2357   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2358   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2359   PetscReal      atmp;
2360   PetscReal      *work,*svalues,*rvalues;
2361   PetscErrorCode ierr;
2362   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2363   PetscMPIInt    rank,size;
2364   PetscInt       *rowners_bs,dest,count,source;
2365   PetscScalar    *va;
2366   MatScalar      *ba;
2367   MPI_Status     stat;
2368 
2369   PetscFunctionBegin;
2370   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2371   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2372   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2373 
2374   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2375   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2376 
2377   bs   = A->rmap->bs;
2378   mbs  = a->mbs;
2379   Mbs  = a->Mbs;
2380   ba   = b->a;
2381   bi   = b->i;
2382   bj   = b->j;
2383 
2384   /* find ownerships */
2385   rowners_bs = A->rmap->range;
2386 
2387   /* each proc creates an array to be distributed */
2388   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2389   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2390 
2391   /* row_max for B */
2392   if (rank != size-1){
2393     for (i=0; i<mbs; i++) {
2394       ncols = bi[1] - bi[0]; bi++;
2395       brow  = bs*i;
2396       for (j=0; j<ncols; j++){
2397         bcol = bs*(*bj);
2398         for (kcol=0; kcol<bs; kcol++){
2399           col = bcol + kcol;                 /* local col index */
2400           col += rowners_bs[rank+1];      /* global col index */
2401           for (krow=0; krow<bs; krow++){
2402             atmp = PetscAbsScalar(*ba); ba++;
2403             row = brow + krow;    /* local row index */
2404             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2405             if (work[col] < atmp) work[col] = atmp;
2406           }
2407         }
2408         bj++;
2409       }
2410     }
2411 
2412     /* send values to its owners */
2413     for (dest=rank+1; dest<size; dest++){
2414       svalues = work + rowners_bs[dest];
2415       count   = rowners_bs[dest+1]-rowners_bs[dest];
2416       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2417     }
2418   }
2419 
2420   /* receive values */
2421   if (rank){
2422     rvalues = work;
2423     count   = rowners_bs[rank+1]-rowners_bs[rank];
2424     for (source=0; source<rank; source++){
2425       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2426       /* process values */
2427       for (i=0; i<count; i++){
2428         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2429       }
2430     }
2431   }
2432 
2433   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2434   ierr = PetscFree(work);CHKERRQ(ierr);
2435   PetscFunctionReturn(0);
2436 }
2437 
2438 #undef __FUNCT__
2439 #define __FUNCT__ "MatRelax_MPISBAIJ"
2440 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2441 {
2442   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2443   PetscErrorCode ierr;
2444   PetscInt       mbs=mat->mbs,bs=matin->rmap->bs;
2445   PetscScalar    *x,*b,*ptr,zero=0.0;
2446   Vec            bb1;
2447 
2448   PetscFunctionBegin;
2449   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2450   if (bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2451 
2452   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2453     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2454       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2455       its--;
2456     }
2457 
2458     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2459     while (its--){
2460 
2461       /* lower triangular part: slvec0b = - B^T*xx */
2462       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2463 
2464       /* copy xx into slvec0a */
2465       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2466       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2467       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2468       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2469 
2470       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2471 
2472       /* copy bb into slvec1a */
2473       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2474       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2475       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2476       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2477 
2478       /* set slvec1b = 0 */
2479       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2480 
2481       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2482       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2483       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2484       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2485 
2486       /* upper triangular part: bb1 = bb1 - B*x */
2487       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2488 
2489       /* local diagonal sweep */
2490       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2491     }
2492     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2493   } else {
2494     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2495   }
2496   PetscFunctionReturn(0);
2497 }
2498 
2499 #undef __FUNCT__
2500 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2501 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2502 {
2503   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2504   PetscErrorCode ierr;
2505   Vec            lvec1,bb1;
2506 
2507   PetscFunctionBegin;
2508   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2509   if (matin->rmap->bs > 1) SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2510 
2511   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2512     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2513       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2514       its--;
2515     }
2516 
2517     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2518     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2519     while (its--){
2520       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2521 
2522       /* lower diagonal part: bb1 = bb - B^T*xx */
2523       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2524       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2525 
2526       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2527       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2528       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2529 
2530       /* upper diagonal part: bb1 = bb1 - B*x */
2531       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2532       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2533 
2534       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2535 
2536       /* diagonal sweep */
2537       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2538     }
2539     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2540     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2541   } else {
2542     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2543   }
2544   PetscFunctionReturn(0);
2545 }
2546 
2547