xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 5c9eb25f753630cfd361293e05e7358a00a954ac)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 EXTERN_C_BEGIN
24 #undef __FUNCT__
25 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
26 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
27 {
28   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
29   PetscErrorCode ierr;
30 
31   PetscFunctionBegin;
32   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
33   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
34   PetscFunctionReturn(0);
35 }
36 EXTERN_C_END
37 
38 EXTERN_C_BEGIN
39 #undef __FUNCT__
40 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
41 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
42 {
43   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
44   PetscErrorCode ierr;
45 
46   PetscFunctionBegin;
47   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
48   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
49   PetscFunctionReturn(0);
50 }
51 EXTERN_C_END
52 
53 
54 #define CHUNKSIZE  10
55 
56 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
57 { \
58  \
59     brow = row/bs;  \
60     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
61     rmax = aimax[brow]; nrow = ailen[brow]; \
62       bcol = col/bs; \
63       ridx = row % bs; cidx = col % bs; \
64       low = 0; high = nrow; \
65       while (high-low > 3) { \
66         t = (low+high)/2; \
67         if (rp[t] > bcol) high = t; \
68         else              low  = t; \
69       } \
70       for (_i=low; _i<high; _i++) { \
71         if (rp[_i] > bcol) break; \
72         if (rp[_i] == bcol) { \
73           bap  = ap +  bs2*_i + bs*cidx + ridx; \
74           if (addv == ADD_VALUES) *bap += value;  \
75           else                    *bap  = value;  \
76           goto a_noinsert; \
77         } \
78       } \
79       if (a->nonew == 1) goto a_noinsert; \
80       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
81       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
82       N = nrow++ - 1;  \
83       /* shift up all the later entries in this row */ \
84       for (ii=N; ii>=_i; ii--) { \
85         rp[ii+1] = rp[ii]; \
86         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
87       } \
88       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
89       rp[_i]                      = bcol;  \
90       ap[bs2*_i + bs*cidx + ridx] = value;  \
91       a_noinsert:; \
92     ailen[brow] = nrow; \
93 }
94 
95 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
96 { \
97     brow = row/bs;  \
98     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
99     rmax = bimax[brow]; nrow = bilen[brow]; \
100       bcol = col/bs; \
101       ridx = row % bs; cidx = col % bs; \
102       low = 0; high = nrow; \
103       while (high-low > 3) { \
104         t = (low+high)/2; \
105         if (rp[t] > bcol) high = t; \
106         else              low  = t; \
107       } \
108       for (_i=low; _i<high; _i++) { \
109         if (rp[_i] > bcol) break; \
110         if (rp[_i] == bcol) { \
111           bap  = ap +  bs2*_i + bs*cidx + ridx; \
112           if (addv == ADD_VALUES) *bap += value;  \
113           else                    *bap  = value;  \
114           goto b_noinsert; \
115         } \
116       } \
117       if (b->nonew == 1) goto b_noinsert; \
118       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
119       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
120       N = nrow++ - 1;  \
121       /* shift up all the later entries in this row */ \
122       for (ii=N; ii>=_i; ii--) { \
123         rp[ii+1] = rp[ii]; \
124         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
125       } \
126       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
127       rp[_i]                      = bcol;  \
128       ap[bs2*_i + bs*cidx + ridx] = value;  \
129       b_noinsert:; \
130     bilen[brow] = nrow; \
131 }
132 
133 /* Only add/insert a(i,j) with i<=j (blocks).
134    Any a(i,j) with i>j input by user is ingored.
135 */
136 #undef __FUNCT__
137 #define __FUNCT__ "MatSetValues_MPISBAIJ"
138 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
139 {
140   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
141   MatScalar      value;
142   PetscTruth     roworiented = baij->roworiented;
143   PetscErrorCode ierr;
144   PetscInt       i,j,row,col;
145   PetscInt       rstart_orig=mat->rmap.rstart;
146   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
147   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
148 
149   /* Some Variables required in the macro */
150   Mat            A = baij->A;
151   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
152   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
153   MatScalar      *aa=a->a;
154 
155   Mat            B = baij->B;
156   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
157   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
158   MatScalar     *ba=b->a;
159 
160   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
161   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
162   MatScalar     *ap,*bap;
163 
164   /* for stash */
165   PetscInt      n_loc, *in_loc = PETSC_NULL;
166   MatScalar     *v_loc = PETSC_NULL;
167 
168   PetscFunctionBegin;
169 
170   if (!baij->donotstash){
171     if (n > baij->n_loc) {
172       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
173       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
174       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
175       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
176       baij->n_loc = n;
177     }
178     in_loc = baij->in_loc;
179     v_loc  = baij->v_loc;
180   }
181 
182   for (i=0; i<m; i++) {
183     if (im[i] < 0) continue;
184 #if defined(PETSC_USE_DEBUG)
185     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
186 #endif
187     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
188       row = im[i] - rstart_orig;              /* local row index */
189       for (j=0; j<n; j++) {
190         if (im[i]/bs > in[j]/bs){
191           if (a->ignore_ltriangular){
192             continue;    /* ignore lower triangular blocks */
193           } else {
194             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
195           }
196         }
197         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
198           col = in[j] - cstart_orig;          /* local col index */
199           brow = row/bs; bcol = col/bs;
200           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
201           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
202           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
203           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
204         } else if (in[j] < 0) continue;
205 #if defined(PETSC_USE_DEBUG)
206         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
207 #endif
208         else {  /* off-diag entry (B) */
209           if (mat->was_assembled) {
210             if (!baij->colmap) {
211               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
212             }
213 #if defined (PETSC_USE_CTABLE)
214             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
215             col  = col - 1;
216 #else
217             col = baij->colmap[in[j]/bs] - 1;
218 #endif
219             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
220               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
221               col =  in[j];
222               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
223               B = baij->B;
224               b = (Mat_SeqBAIJ*)(B)->data;
225               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
226               ba=b->a;
227             } else col += in[j]%bs;
228           } else col = in[j];
229           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
230           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
231           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
232         }
233       }
234     } else {  /* off processor entry */
235       if (!baij->donotstash) {
236         n_loc = 0;
237         for (j=0; j<n; j++){
238           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
239           in_loc[n_loc] = in[j];
240           if (roworiented) {
241             v_loc[n_loc] = v[i*n+j];
242           } else {
243             v_loc[n_loc] = v[j*m+i];
244           }
245           n_loc++;
246         }
247         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
248       }
249     }
250   }
251   PetscFunctionReturn(0);
252 }
253 
254 #undef __FUNCT__
255 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
256 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
257 {
258   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
259   const MatScalar *value;
260   MatScalar       *barray=baij->barray;
261   PetscTruth      roworiented = baij->roworiented;
262   PetscErrorCode  ierr;
263   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
264   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
265   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
266 
267   PetscFunctionBegin;
268   if(!barray) {
269     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
270     baij->barray = barray;
271   }
272 
273   if (roworiented) {
274     stepval = (n-1)*bs;
275   } else {
276     stepval = (m-1)*bs;
277   }
278   for (i=0; i<m; i++) {
279     if (im[i] < 0) continue;
280 #if defined(PETSC_USE_DEBUG)
281     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
282 #endif
283     if (im[i] >= rstart && im[i] < rend) {
284       row = im[i] - rstart;
285       for (j=0; j<n; j++) {
286         /* If NumCol = 1 then a copy is not required */
287         if ((roworiented) && (n == 1)) {
288           barray = (MatScalar*) v + i*bs2;
289         } else if((!roworiented) && (m == 1)) {
290           barray = (MatScalar*) v + j*bs2;
291         } else { /* Here a copy is required */
292           if (roworiented) {
293             value = v + i*(stepval+bs)*bs + j*bs;
294           } else {
295             value = v + j*(stepval+bs)*bs + i*bs;
296           }
297           for (ii=0; ii<bs; ii++,value+=stepval) {
298             for (jj=0; jj<bs; jj++) {
299               *barray++  = *value++;
300             }
301           }
302           barray -=bs2;
303         }
304 
305         if (in[j] >= cstart && in[j] < cend){
306           col  = in[j] - cstart;
307           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
308         }
309         else if (in[j] < 0) continue;
310 #if defined(PETSC_USE_DEBUG)
311         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
312 #endif
313         else {
314           if (mat->was_assembled) {
315             if (!baij->colmap) {
316               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
317             }
318 
319 #if defined(PETSC_USE_DEBUG)
320 #if defined (PETSC_USE_CTABLE)
321             { PetscInt data;
322               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
323               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
324             }
325 #else
326             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
327 #endif
328 #endif
329 #if defined (PETSC_USE_CTABLE)
330 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
331             col  = (col - 1)/bs;
332 #else
333             col = (baij->colmap[in[j]] - 1)/bs;
334 #endif
335             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
336               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
337               col =  in[j];
338             }
339           }
340           else col = in[j];
341           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
342         }
343       }
344     } else {
345       if (!baij->donotstash) {
346         if (roworiented) {
347           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
348         } else {
349           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
350         }
351       }
352     }
353   }
354   PetscFunctionReturn(0);
355 }
356 
357 #undef __FUNCT__
358 #define __FUNCT__ "MatGetValues_MPISBAIJ"
359 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
360 {
361   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
362   PetscErrorCode ierr;
363   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
364   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
365 
366   PetscFunctionBegin;
367   for (i=0; i<m; i++) {
368     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
369     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
370     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
371       row = idxm[i] - bsrstart;
372       for (j=0; j<n; j++) {
373         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
374         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
375         if (idxn[j] >= bscstart && idxn[j] < bscend){
376           col = idxn[j] - bscstart;
377           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
378         } else {
379           if (!baij->colmap) {
380             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381           }
382 #if defined (PETSC_USE_CTABLE)
383           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
384           data --;
385 #else
386           data = baij->colmap[idxn[j]/bs]-1;
387 #endif
388           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
389           else {
390             col  = data + idxn[j]%bs;
391             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
392           }
393         }
394       }
395     } else {
396       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
397     }
398   }
399  PetscFunctionReturn(0);
400 }
401 
402 #undef __FUNCT__
403 #define __FUNCT__ "MatNorm_MPISBAIJ"
404 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
405 {
406   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
407   PetscErrorCode ierr;
408   PetscReal      sum[2],*lnorm2;
409 
410   PetscFunctionBegin;
411   if (baij->size == 1) {
412     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
413   } else {
414     if (type == NORM_FROBENIUS) {
415       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
416       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
417       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
418       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
419       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
420       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
421       *norm = sqrt(sum[0] + 2*sum[1]);
422       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
423     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
424       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
425       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
426       PetscReal    *rsum,*rsum2,vabs;
427       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
428       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
429       MatScalar    *v;
430 
431       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
432       rsum2 = rsum + mat->cmap.N;
433       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
434       /* Amat */
435       v = amat->a; jj = amat->j;
436       for (brow=0; brow<mbs; brow++) {
437         grow = bs*(rstart + brow);
438         nz = amat->i[brow+1] - amat->i[brow];
439         for (bcol=0; bcol<nz; bcol++){
440           gcol = bs*(rstart + *jj); jj++;
441           for (col=0; col<bs; col++){
442             for (row=0; row<bs; row++){
443               vabs = PetscAbsScalar(*v); v++;
444               rsum[gcol+col] += vabs;
445               /* non-diagonal block */
446               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
447             }
448           }
449         }
450       }
451       /* Bmat */
452       v = bmat->a; jj = bmat->j;
453       for (brow=0; brow<mbs; brow++) {
454         grow = bs*(rstart + brow);
455         nz = bmat->i[brow+1] - bmat->i[brow];
456         for (bcol=0; bcol<nz; bcol++){
457           gcol = bs*garray[*jj]; jj++;
458           for (col=0; col<bs; col++){
459             for (row=0; row<bs; row++){
460               vabs = PetscAbsScalar(*v); v++;
461               rsum[gcol+col] += vabs;
462               rsum[grow+row] += vabs;
463             }
464           }
465         }
466       }
467       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
468       *norm = 0.0;
469       for (col=0; col<mat->cmap.N; col++) {
470         if (rsum2[col] > *norm) *norm = rsum2[col];
471       }
472       ierr = PetscFree(rsum);CHKERRQ(ierr);
473     } else {
474       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
475     }
476   }
477   PetscFunctionReturn(0);
478 }
479 
480 #undef __FUNCT__
481 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
482 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
483 {
484   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
485   PetscErrorCode ierr;
486   PetscInt       nstash,reallocs;
487   InsertMode     addv;
488 
489   PetscFunctionBegin;
490   if (baij->donotstash) {
491     PetscFunctionReturn(0);
492   }
493 
494   /* make sure all processors are either in INSERTMODE or ADDMODE */
495   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
496   if (addv == (ADD_VALUES|INSERT_VALUES)) {
497     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
498   }
499   mat->insertmode = addv; /* in case this processor had no cache */
500 
501   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
502   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
503   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
504   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
505   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
506   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
507   PetscFunctionReturn(0);
508 }
509 
510 #undef __FUNCT__
511 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
512 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
513 {
514   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
515   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
516   PetscErrorCode ierr;
517   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
518   PetscInt       *row,*col;
519   PetscTruth     other_disassembled;
520   PetscMPIInt    n;
521   PetscTruth     r1,r2,r3;
522   MatScalar      *val;
523   InsertMode     addv = mat->insertmode;
524 
525   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
526   PetscFunctionBegin;
527 
528   if (!baij->donotstash) {
529     while (1) {
530       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
531       if (!flg) break;
532 
533       for (i=0; i<n;) {
534         /* Now identify the consecutive vals belonging to the same row */
535         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
536         if (j < n) ncols = j-i;
537         else       ncols = n-i;
538         /* Now assemble all these values with a single function call */
539         ierr = MatSetValues_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
540         i = j;
541       }
542     }
543     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
544     /* Now process the block-stash. Since the values are stashed column-oriented,
545        set the roworiented flag to column oriented, and after MatSetValues()
546        restore the original flags */
547     r1 = baij->roworiented;
548     r2 = a->roworiented;
549     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
550     baij->roworiented = PETSC_FALSE;
551     a->roworiented    = PETSC_FALSE;
552     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
553     while (1) {
554       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
555       if (!flg) break;
556 
557       for (i=0; i<n;) {
558         /* Now identify the consecutive vals belonging to the same row */
559         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
560         if (j < n) ncols = j-i;
561         else       ncols = n-i;
562         ierr = MatSetValuesBlocked_MPISBAIJ(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
563         i = j;
564       }
565     }
566     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
567     baij->roworiented = r1;
568     a->roworiented    = r2;
569     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
570   }
571 
572   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
573   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
574 
575   /* determine if any processor has disassembled, if so we must
576      also disassemble ourselfs, in order that we may reassemble. */
577   /*
578      if nonzero structure of submatrix B cannot change then we know that
579      no processor disassembled thus we can skip this stuff
580   */
581   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
582     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
583     if (mat->was_assembled && !other_disassembled) {
584       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
585     }
586   }
587 
588   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
589     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
590   }
591   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
592   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
593   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
594 
595   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
596   baij->rowvalues = 0;
597 
598   PetscFunctionReturn(0);
599 }
600 
601 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
602 #undef __FUNCT__
603 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
604 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
605 {
606   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
607   PetscErrorCode    ierr;
608   PetscInt          bs = mat->rmap.bs;
609   PetscMPIInt       size = baij->size,rank = baij->rank;
610   PetscTruth        iascii,isdraw;
611   PetscViewer       sviewer;
612   PetscViewerFormat format;
613 
614   PetscFunctionBegin;
615   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
616   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
617   if (iascii) {
618     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
619     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
620       MatInfo info;
621       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
622       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
623       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
624               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
625               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
626       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
627       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
628       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
629       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
630       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
631       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
632       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
633       PetscFunctionReturn(0);
634     } else if (format == PETSC_VIEWER_ASCII_INFO) {
635       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
636       PetscFunctionReturn(0);
637     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
638       PetscFunctionReturn(0);
639     }
640   }
641 
642   if (isdraw) {
643     PetscDraw  draw;
644     PetscTruth isnull;
645     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
646     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
647   }
648 
649   if (size == 1) {
650     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
651     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
652   } else {
653     /* assemble the entire matrix onto first processor. */
654     Mat          A;
655     Mat_SeqSBAIJ *Aloc;
656     Mat_SeqBAIJ  *Bloc;
657     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
658     MatScalar    *a;
659 
660     /* Should this be the same type as mat? */
661     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
662     if (!rank) {
663       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
664     } else {
665       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
666     }
667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
668     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
669     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
670 
671     /* copy over the A part */
672     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
673     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
674     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
675 
676     for (i=0; i<mbs; i++) {
677       rvals[0] = bs*(baij->rstartbs + i);
678       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
679       for (j=ai[i]; j<ai[i+1]; j++) {
680         col = (baij->cstartbs+aj[j])*bs;
681         for (k=0; k<bs; k++) {
682           ierr = MatSetValues_MPISBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
683           col++; a += bs;
684         }
685       }
686     }
687     /* copy over the B part */
688     Bloc = (Mat_SeqBAIJ*)baij->B->data;
689     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
690     for (i=0; i<mbs; i++) {
691 
692       rvals[0] = bs*(baij->rstartbs + i);
693       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
694       for (j=ai[i]; j<ai[i+1]; j++) {
695         col = baij->garray[aj[j]]*bs;
696         for (k=0; k<bs; k++) {
697           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
698           col++; a += bs;
699         }
700       }
701     }
702     ierr = PetscFree(rvals);CHKERRQ(ierr);
703     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
704     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
705     /*
706        Everyone has to call to draw the matrix since the graphics waits are
707        synchronized across all processors that share the PetscDraw object
708     */
709     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
710     if (!rank) {
711       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
712       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
713     }
714     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
715     ierr = MatDestroy(A);CHKERRQ(ierr);
716   }
717   PetscFunctionReturn(0);
718 }
719 
720 #undef __FUNCT__
721 #define __FUNCT__ "MatView_MPISBAIJ"
722 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
723 {
724   PetscErrorCode ierr;
725   PetscTruth     iascii,isdraw,issocket,isbinary;
726 
727   PetscFunctionBegin;
728   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
729   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
730   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
731   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
732   if (iascii || isdraw || issocket || isbinary) {
733     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
734   } else {
735     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
736   }
737   PetscFunctionReturn(0);
738 }
739 
740 #undef __FUNCT__
741 #define __FUNCT__ "MatDestroy_MPISBAIJ"
742 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
743 {
744   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
745   PetscErrorCode ierr;
746 
747   PetscFunctionBegin;
748 #if defined(PETSC_USE_LOG)
749   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
750 #endif
751   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
752   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
753   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
754   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
755 #if defined (PETSC_USE_CTABLE)
756   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
757 #else
758   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
759 #endif
760   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
761   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
762   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
763   if (baij->slvec0) {
764     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
765     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
766   }
767   if (baij->slvec1) {
768     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
769     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
770     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
771   }
772   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
773   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
774   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
775   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
776 #if defined(PETSC_USE_MAT_SINGLE)
777   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
778 #endif
779   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
780   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
781   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
782   ierr = PetscFree(baij);CHKERRQ(ierr);
783 
784   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
785   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
786   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
787   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
788   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
789   PetscFunctionReturn(0);
790 }
791 
792 #undef __FUNCT__
793 #define __FUNCT__ "MatMult_MPISBAIJ"
794 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
795 {
796   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
797   PetscErrorCode ierr;
798   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
799   PetscScalar    *x,*from,zero=0.0;
800 
801   PetscFunctionBegin;
802   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
803   if (nt != A->cmap.n) {
804     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
805   }
806 
807   /* diagonal part */
808   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
809   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
810 
811   /* subdiagonal part */
812   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
813 
814   /* copy x into the vec slvec0 */
815   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
816   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
817 
818   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
819   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
820   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
821 
822   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
823   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
824   /* supperdiagonal part */
825   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
826   PetscFunctionReturn(0);
827 }
828 
829 #undef __FUNCT__
830 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
831 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
832 {
833   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
834   PetscErrorCode ierr;
835   PetscInt       nt;
836 
837   PetscFunctionBegin;
838   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
839   if (nt != A->cmap.n) {
840     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
841   }
842   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
843   if (nt != A->rmap.N) {
844     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
845   }
846 
847   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
848   /* do diagonal part */
849   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
850   /* do supperdiagonal part */
851   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
852   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
853   /* do subdiagonal part */
854   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
855   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
856   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
857 
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
863 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
864 {
865   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
866   PetscErrorCode ierr;
867   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
868   PetscScalar    *x,*from,zero=0.0;
869 
870   PetscFunctionBegin;
871   /*
872   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
873   PetscSynchronizedFlush(((PetscObject)A)->comm);
874   */
875   /* diagonal part */
876   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
877   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
878 
879   /* subdiagonal part */
880   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
881 
882   /* copy x into the vec slvec0 */
883   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
884   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
885   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
886   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
887 
888   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
889   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
890   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
891 
892   /* supperdiagonal part */
893   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
894 
895   PetscFunctionReturn(0);
896 }
897 
898 #undef __FUNCT__
899 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
900 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
901 {
902   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
903   PetscErrorCode ierr;
904 
905   PetscFunctionBegin;
906   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
907   /* do diagonal part */
908   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
909   /* do supperdiagonal part */
910   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
911   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
912 
913   /* do subdiagonal part */
914   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
915   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
916   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
917 
918   PetscFunctionReturn(0);
919 }
920 
921 /*
922   This only works correctly for square matrices where the subblock A->A is the
923    diagonal block
924 */
925 #undef __FUNCT__
926 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
927 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931 
932   PetscFunctionBegin;
933   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
934   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
935   PetscFunctionReturn(0);
936 }
937 
938 #undef __FUNCT__
939 #define __FUNCT__ "MatScale_MPISBAIJ"
940 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
941 {
942   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
943   PetscErrorCode ierr;
944 
945   PetscFunctionBegin;
946   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
947   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatGetRow_MPISBAIJ"
953 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
954 {
955   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
956   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
957   PetscErrorCode ierr;
958   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
959   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
960   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
961 
962   PetscFunctionBegin;
963   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
964   mat->getrowactive = PETSC_TRUE;
965 
966   if (!mat->rowvalues && (idx || v)) {
967     /*
968         allocate enough space to hold information from the longest row.
969     */
970     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
971     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
972     PetscInt     max = 1,mbs = mat->mbs,tmp;
973     for (i=0; i<mbs; i++) {
974       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
975       if (max < tmp) { max = tmp; }
976     }
977     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
978     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
979   }
980 
981   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
982   lrow = row - brstart;  /* local row index */
983 
984   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
985   if (!v)   {pvA = 0; pvB = 0;}
986   if (!idx) {pcA = 0; if (!v) pcB = 0;}
987   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
988   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
989   nztot = nzA + nzB;
990 
991   cmap  = mat->garray;
992   if (v  || idx) {
993     if (nztot) {
994       /* Sort by increasing column numbers, assuming A and B already sorted */
995       PetscInt imark = -1;
996       if (v) {
997         *v = v_p = mat->rowvalues;
998         for (i=0; i<nzB; i++) {
999           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1000           else break;
1001         }
1002         imark = i;
1003         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1004         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1005       }
1006       if (idx) {
1007         *idx = idx_p = mat->rowindices;
1008         if (imark > -1) {
1009           for (i=0; i<imark; i++) {
1010             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1011           }
1012         } else {
1013           for (i=0; i<nzB; i++) {
1014             if (cmap[cworkB[i]/bs] < cstart)
1015               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1016             else break;
1017           }
1018           imark = i;
1019         }
1020         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1021         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1022       }
1023     } else {
1024       if (idx) *idx = 0;
1025       if (v)   *v   = 0;
1026     }
1027   }
1028   *nz = nztot;
1029   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1030   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1036 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1037 {
1038   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1039 
1040   PetscFunctionBegin;
1041   if (!baij->getrowactive) {
1042     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1043   }
1044   baij->getrowactive = PETSC_FALSE;
1045   PetscFunctionReturn(0);
1046 }
1047 
1048 #undef __FUNCT__
1049 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1050 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1051 {
1052   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1053   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1054 
1055   PetscFunctionBegin;
1056   aA->getrow_utriangular = PETSC_TRUE;
1057   PetscFunctionReturn(0);
1058 }
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1061 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1062 {
1063   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1064   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1065 
1066   PetscFunctionBegin;
1067   aA->getrow_utriangular = PETSC_FALSE;
1068   PetscFunctionReturn(0);
1069 }
1070 
1071 #undef __FUNCT__
1072 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1073 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1074 {
1075   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1076   PetscErrorCode ierr;
1077 
1078   PetscFunctionBegin;
1079   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1080   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1081   PetscFunctionReturn(0);
1082 }
1083 
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1086 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1087 {
1088   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1093   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1099 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1100 {
1101   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1106   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1112 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1113 {
1114   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1115   Mat            A = a->A,B = a->B;
1116   PetscErrorCode ierr;
1117   PetscReal      isend[5],irecv[5];
1118 
1119   PetscFunctionBegin;
1120   info->block_size     = (PetscReal)matin->rmap.bs;
1121   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1122   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1123   isend[3] = info->memory;  isend[4] = info->mallocs;
1124   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1125   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1126   isend[3] += info->memory;  isend[4] += info->mallocs;
1127   if (flag == MAT_LOCAL) {
1128     info->nz_used      = isend[0];
1129     info->nz_allocated = isend[1];
1130     info->nz_unneeded  = isend[2];
1131     info->memory       = isend[3];
1132     info->mallocs      = isend[4];
1133   } else if (flag == MAT_GLOBAL_MAX) {
1134     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1135     info->nz_used      = irecv[0];
1136     info->nz_allocated = irecv[1];
1137     info->nz_unneeded  = irecv[2];
1138     info->memory       = irecv[3];
1139     info->mallocs      = irecv[4];
1140   } else if (flag == MAT_GLOBAL_SUM) {
1141     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1142     info->nz_used      = irecv[0];
1143     info->nz_allocated = irecv[1];
1144     info->nz_unneeded  = irecv[2];
1145     info->memory       = irecv[3];
1146     info->mallocs      = irecv[4];
1147   } else {
1148     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1149   }
1150   info->rows_global       = (PetscReal)A->rmap.N;
1151   info->columns_global    = (PetscReal)A->cmap.N;
1152   info->rows_local        = (PetscReal)A->rmap.N;
1153   info->columns_local     = (PetscReal)A->cmap.N;
1154   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1155   info->fill_ratio_needed = 0;
1156   info->factor_mallocs    = 0;
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1162 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1163 {
1164   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1165   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   switch (op) {
1170   case MAT_NEW_NONZERO_LOCATIONS:
1171   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1172   case MAT_KEEP_ZEROED_ROWS:
1173   case MAT_NEW_NONZERO_LOCATION_ERR:
1174     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1175     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1176     break;
1177   case MAT_ROW_ORIENTED:
1178     a->roworiented = flg;
1179     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1180     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1181     break;
1182   case MAT_NEW_DIAGONALS:
1183     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1184     break;
1185   case MAT_IGNORE_OFF_PROC_ENTRIES:
1186     a->donotstash = flg;
1187     break;
1188   case MAT_USE_HASH_TABLE:
1189     a->ht_flag = flg;
1190     break;
1191   case MAT_HERMITIAN:
1192     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1193   case MAT_SYMMETRIC:
1194   case MAT_STRUCTURALLY_SYMMETRIC:
1195   case MAT_SYMMETRY_ETERNAL:
1196     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1197     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1198     break;
1199   case MAT_IGNORE_LOWER_TRIANGULAR:
1200     aA->ignore_ltriangular = flg;
1201     break;
1202   case MAT_ERROR_LOWER_TRIANGULAR:
1203     aA->ignore_ltriangular = flg;
1204     break;
1205   case MAT_GETROW_UPPERTRIANGULAR:
1206     aA->getrow_utriangular = flg;
1207     break;
1208   default:
1209     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1210   }
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1216 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,MatReuse reuse,Mat *B)
1217 {
1218   PetscErrorCode ierr;
1219   PetscFunctionBegin;
1220   if (MAT_INITIAL_MATRIX || *B != A) {
1221     ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1222   }
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1228 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1229 {
1230   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1231   Mat            a=baij->A, b=baij->B;
1232   PetscErrorCode ierr;
1233   PetscInt       nv,m,n;
1234   PetscTruth     flg;
1235 
1236   PetscFunctionBegin;
1237   if (ll != rr){
1238     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1239     if (!flg)
1240       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1241   }
1242   if (!ll) PetscFunctionReturn(0);
1243 
1244   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1245   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1246 
1247   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1248   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1249 
1250   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1251 
1252   /* left diagonalscale the off-diagonal part */
1253   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1254 
1255   /* scale the diagonal part */
1256   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1257 
1258   /* right diagonalscale the off-diagonal part */
1259   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1260   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1266 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1267 {
1268   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1269   PetscErrorCode ierr;
1270 
1271   PetscFunctionBegin;
1272   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatEqual_MPISBAIJ"
1280 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1281 {
1282   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1283   Mat            a,b,c,d;
1284   PetscTruth     flg;
1285   PetscErrorCode ierr;
1286 
1287   PetscFunctionBegin;
1288   a = matA->A; b = matA->B;
1289   c = matB->A; d = matB->B;
1290 
1291   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1292   if (flg) {
1293     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1294   }
1295   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatCopy_MPISBAIJ"
1301 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1302 {
1303   PetscErrorCode ierr;
1304   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1305   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1306 
1307   PetscFunctionBegin;
1308   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1309   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1310     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1311     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1312     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1313   } else {
1314     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1315     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1316   }
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1322 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1323 {
1324   PetscErrorCode ierr;
1325 
1326   PetscFunctionBegin;
1327   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 #include "petscblaslapack.h"
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1334 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1335 {
1336   PetscErrorCode ierr;
1337   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1338   PetscBLASInt   bnz,one=1;
1339   Mat_SeqSBAIJ   *xa,*ya;
1340   Mat_SeqBAIJ    *xb,*yb;
1341 
1342   PetscFunctionBegin;
1343   if (str == SAME_NONZERO_PATTERN) {
1344     PetscScalar alpha = a;
1345     xa = (Mat_SeqSBAIJ *)xx->A->data;
1346     ya = (Mat_SeqSBAIJ *)yy->A->data;
1347     bnz = PetscBLASIntCast(xa->nz);
1348     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1349     xb = (Mat_SeqBAIJ *)xx->B->data;
1350     yb = (Mat_SeqBAIJ *)yy->B->data;
1351     bnz = PetscBLASIntCast(xb->nz);
1352     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1353   } else {
1354     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1355     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1356     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1357   }
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 #undef __FUNCT__
1362 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1363 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1364 {
1365   PetscErrorCode ierr;
1366   PetscInt       i;
1367   PetscTruth     flg;
1368 
1369   PetscFunctionBegin;
1370   for (i=0; i<n; i++) {
1371     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1372     if (!flg) {
1373       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1374     }
1375   }
1376   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 
1381 /* -------------------------------------------------------------------*/
1382 static struct _MatOps MatOps_Values = {
1383        MatSetValues_MPISBAIJ,
1384        MatGetRow_MPISBAIJ,
1385        MatRestoreRow_MPISBAIJ,
1386        MatMult_MPISBAIJ,
1387 /* 4*/ MatMultAdd_MPISBAIJ,
1388        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1389        MatMultAdd_MPISBAIJ,
1390        0,
1391        0,
1392        0,
1393 /*10*/ 0,
1394        0,
1395        0,
1396        MatRelax_MPISBAIJ,
1397        MatTranspose_MPISBAIJ,
1398 /*15*/ MatGetInfo_MPISBAIJ,
1399        MatEqual_MPISBAIJ,
1400        MatGetDiagonal_MPISBAIJ,
1401        MatDiagonalScale_MPISBAIJ,
1402        MatNorm_MPISBAIJ,
1403 /*20*/ MatAssemblyBegin_MPISBAIJ,
1404        MatAssemblyEnd_MPISBAIJ,
1405        0,
1406        MatSetOption_MPISBAIJ,
1407        MatZeroEntries_MPISBAIJ,
1408 /*25*/ 0,
1409        0,
1410        0,
1411        0,
1412        0,
1413 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1414        0,
1415        0,
1416        0,
1417        0,
1418 /*35*/ MatDuplicate_MPISBAIJ,
1419        0,
1420        0,
1421        0,
1422        0,
1423 /*40*/ MatAXPY_MPISBAIJ,
1424        MatGetSubMatrices_MPISBAIJ,
1425        MatIncreaseOverlap_MPISBAIJ,
1426        MatGetValues_MPISBAIJ,
1427        MatCopy_MPISBAIJ,
1428 /*45*/ 0,
1429        MatScale_MPISBAIJ,
1430        0,
1431        0,
1432        0,
1433 /*50*/ 0,
1434        0,
1435        0,
1436        0,
1437        0,
1438 /*55*/ 0,
1439        0,
1440        MatSetUnfactored_MPISBAIJ,
1441        0,
1442        MatSetValuesBlocked_MPISBAIJ,
1443 /*60*/ 0,
1444        0,
1445        0,
1446        0,
1447        0,
1448 /*65*/ 0,
1449        0,
1450        0,
1451        0,
1452        0,
1453 /*70*/ MatGetRowMaxAbs_MPISBAIJ,
1454        0,
1455        0,
1456        0,
1457        0,
1458 /*75*/ 0,
1459        0,
1460        0,
1461        0,
1462        0,
1463 /*80*/ 0,
1464        0,
1465        0,
1466        0,
1467        MatLoad_MPISBAIJ,
1468 /*85*/ 0,
1469        0,
1470        0,
1471        0,
1472        0,
1473 /*90*/ 0,
1474        0,
1475        0,
1476        0,
1477        0,
1478 /*95*/ 0,
1479        0,
1480        0,
1481        0,
1482        0,
1483 /*100*/0,
1484        0,
1485        0,
1486        0,
1487        0,
1488 /*105*/0,
1489        MatRealPart_MPISBAIJ,
1490        MatImaginaryPart_MPISBAIJ,
1491        MatGetRowUpperTriangular_MPISBAIJ,
1492        MatRestoreRowUpperTriangular_MPISBAIJ
1493 };
1494 
1495 
1496 EXTERN_C_BEGIN
1497 #undef __FUNCT__
1498 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1499 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1500 {
1501   PetscFunctionBegin;
1502   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1503   *iscopy = PETSC_FALSE;
1504   PetscFunctionReturn(0);
1505 }
1506 EXTERN_C_END
1507 
1508 EXTERN_C_BEGIN
1509 #undef __FUNCT__
1510 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1511 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1512 {
1513   Mat_MPISBAIJ   *b;
1514   PetscErrorCode ierr;
1515   PetscInt       i,mbs,Mbs;
1516 
1517   PetscFunctionBegin;
1518   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1519     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
1520   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1521 
1522   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1523   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1524   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1525   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1526   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1527 
1528   B->rmap.bs = B->cmap.bs = bs;
1529   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1530   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1531 
1532   if (d_nnz) {
1533     for (i=0; i<B->rmap.n/bs; i++) {
1534       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1535     }
1536   }
1537   if (o_nnz) {
1538     for (i=0; i<B->rmap.n/bs; i++) {
1539       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1540     }
1541   }
1542   B->preallocated = PETSC_TRUE;
1543 
1544   b   = (Mat_MPISBAIJ*)B->data;
1545   mbs = B->rmap.n/bs;
1546   Mbs = B->rmap.N/bs;
1547   if (mbs*bs != B->rmap.n) {
1548     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1549   }
1550 
1551   B->rmap.bs  = bs;
1552   b->bs2 = bs*bs;
1553   b->mbs = mbs;
1554   b->nbs = mbs;
1555   b->Mbs = Mbs;
1556   b->Nbs = Mbs;
1557 
1558   for (i=0; i<=b->size; i++) {
1559     b->rangebs[i] = B->rmap.range[i]/bs;
1560   }
1561   b->rstartbs = B->rmap.rstart/bs;
1562   b->rendbs   = B->rmap.rend/bs;
1563 
1564   b->cstartbs = B->cmap.rstart/bs;
1565   b->cendbs   = B->cmap.rend/bs;
1566 
1567   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1568   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1569   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1570   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1571   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1572 
1573   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1574   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1575   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1576   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1577   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1578 
1579   /* build cache for off array entries formed */
1580   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1581 
1582   PetscFunctionReturn(0);
1583 }
1584 EXTERN_C_END
1585 
1586 EXTERN_C_BEGIN
1587 extern PetscErrorCode MatGetFactor_mpisbaij_mumps(Mat,MatFactorType,Mat*);
1588 extern PetscErrorCode PETSCMAT_DLLEXPORT MatGetFactor_mpisbaij_spooles(Mat,MatFactorType,Mat*);
1589 EXTERN_C_END
1590 
1591 /*MC
1592    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1593    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1594 
1595    Options Database Keys:
1596 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1597 
1598   Level: beginner
1599 
1600 .seealso: MatCreateMPISBAIJ
1601 M*/
1602 
1603 EXTERN_C_BEGIN
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatCreate_MPISBAIJ"
1606 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1607 {
1608   Mat_MPISBAIJ   *b;
1609   PetscErrorCode ierr;
1610   PetscTruth     flg;
1611 
1612   PetscFunctionBegin;
1613 
1614   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1615   B->data = (void*)b;
1616   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1617 
1618   B->ops->destroy    = MatDestroy_MPISBAIJ;
1619   B->ops->view       = MatView_MPISBAIJ;
1620   B->mapping         = 0;
1621   B->assembled       = PETSC_FALSE;
1622 
1623   B->insertmode = NOT_SET_VALUES;
1624   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1625   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1626 
1627   /* build local table of row and column ownerships */
1628   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1629 
1630   /* build cache for off array entries formed */
1631   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1632   b->donotstash  = PETSC_FALSE;
1633   b->colmap      = PETSC_NULL;
1634   b->garray      = PETSC_NULL;
1635   b->roworiented = PETSC_TRUE;
1636 
1637   /* stuff used in block assembly */
1638   b->barray       = 0;
1639 
1640   /* stuff used for matrix vector multiply */
1641   b->lvec         = 0;
1642   b->Mvctx        = 0;
1643   b->slvec0       = 0;
1644   b->slvec0b      = 0;
1645   b->slvec1       = 0;
1646   b->slvec1a      = 0;
1647   b->slvec1b      = 0;
1648   b->sMvctx       = 0;
1649 
1650   /* stuff for MatGetRow() */
1651   b->rowindices   = 0;
1652   b->rowvalues    = 0;
1653   b->getrowactive = PETSC_FALSE;
1654 
1655   /* hash table stuff */
1656   b->ht           = 0;
1657   b->hd           = 0;
1658   b->ht_size      = 0;
1659   b->ht_flag      = PETSC_FALSE;
1660   b->ht_fact      = 0;
1661   b->ht_total_ct  = 0;
1662   b->ht_insert_ct = 0;
1663 
1664   b->in_loc       = 0;
1665   b->v_loc        = 0;
1666   b->n_loc        = 0;
1667   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1668     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1669     if (flg) {
1670       PetscReal fact = 1.39;
1671       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1672       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1673       if (fact <= 1.0) fact = 1.39;
1674       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1675       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1676     }
1677   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1678 
1679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpisbaij_mumps_C",
1680                                      "MatGetFactor_mpisbaij_mumps",
1681                                      MatGetFactor_mpisbaij_mumps);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetFactor_mpisbaij_spooles_C",
1683                                      "MatGetFactor_mpisbaij_spooles",
1684                                      MatGetFactor_mpisbaij_spooles);CHKERRQ(ierr);
1685   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1686                                      "MatStoreValues_MPISBAIJ",
1687                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1688   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1689                                      "MatRetrieveValues_MPISBAIJ",
1690                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1691   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1692                                      "MatGetDiagonalBlock_MPISBAIJ",
1693                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1694   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1695                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1696                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1697   B->symmetric                  = PETSC_TRUE;
1698   B->structurally_symmetric     = PETSC_TRUE;
1699   B->symmetric_set              = PETSC_TRUE;
1700   B->structurally_symmetric_set = PETSC_TRUE;
1701   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1702   PetscFunctionReturn(0);
1703 }
1704 EXTERN_C_END
1705 
1706 /*MC
1707    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1708 
1709    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1710    and MATMPISBAIJ otherwise.
1711 
1712    Options Database Keys:
1713 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1714 
1715   Level: beginner
1716 
1717 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1718 M*/
1719 
1720 EXTERN_C_BEGIN
1721 #undef __FUNCT__
1722 #define __FUNCT__ "MatCreate_SBAIJ"
1723 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1724 {
1725   PetscErrorCode ierr;
1726   PetscMPIInt    size;
1727 
1728   PetscFunctionBegin;
1729   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1730   if (size == 1) {
1731     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1732   } else {
1733     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1734   }
1735   PetscFunctionReturn(0);
1736 }
1737 EXTERN_C_END
1738 
1739 #undef __FUNCT__
1740 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1741 /*@C
1742    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1743    the user should preallocate the matrix storage by setting the parameters
1744    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1745    performance can be increased by more than a factor of 50.
1746 
1747    Collective on Mat
1748 
1749    Input Parameters:
1750 +  A - the matrix
1751 .  bs   - size of blockk
1752 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1753            submatrix  (same for all local rows)
1754 .  d_nnz - array containing the number of block nonzeros in the various block rows
1755            in the upper triangular and diagonal part of the in diagonal portion of the local
1756            (possibly different for each block row) or PETSC_NULL.  You must leave room
1757            for the diagonal entry even if it is zero.
1758 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1759            submatrix (same for all local rows).
1760 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1761            off-diagonal portion of the local submatrix (possibly different for
1762            each block row) or PETSC_NULL.
1763 
1764 
1765    Options Database Keys:
1766 .   -mat_no_unroll - uses code that does not unroll the loops in the
1767                      block calculations (much slower)
1768 .   -mat_block_size - size of the blocks to use
1769 
1770    Notes:
1771 
1772    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1773    than it must be used on all processors that share the object for that argument.
1774 
1775    If the *_nnz parameter is given then the *_nz parameter is ignored
1776 
1777    Storage Information:
1778    For a square global matrix we define each processor's diagonal portion
1779    to be its local rows and the corresponding columns (a square submatrix);
1780    each processor's off-diagonal portion encompasses the remainder of the
1781    local matrix (a rectangular submatrix).
1782 
1783    The user can specify preallocated storage for the diagonal part of
1784    the local submatrix with either d_nz or d_nnz (not both).  Set
1785    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1786    memory allocation.  Likewise, specify preallocated storage for the
1787    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1788 
1789    You can call MatGetInfo() to get information on how effective the preallocation was;
1790    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1791    You can also run with the option -info and look for messages with the string
1792    malloc in them to see if additional memory allocation was needed.
1793 
1794    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1795    the figure below we depict these three local rows and all columns (0-11).
1796 
1797 .vb
1798            0 1 2 3 4 5 6 7 8 9 10 11
1799           -------------------
1800    row 3  |  o o o d d d o o o o o o
1801    row 4  |  o o o d d d o o o o o o
1802    row 5  |  o o o d d d o o o o o o
1803           -------------------
1804 .ve
1805 
1806    Thus, any entries in the d locations are stored in the d (diagonal)
1807    submatrix, and any entries in the o locations are stored in the
1808    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1809    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1810 
1811    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1812    plus the diagonal part of the d matrix,
1813    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1814    In general, for PDE problems in which most nonzeros are near the diagonal,
1815    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1816    or you will get TERRIBLE performance; see the users' manual chapter on
1817    matrices.
1818 
1819    Level: intermediate
1820 
1821 .keywords: matrix, block, aij, compressed row, sparse, parallel
1822 
1823 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1824 @*/
1825 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1826 {
1827   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1828 
1829   PetscFunctionBegin;
1830   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1831   if (f) {
1832     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1833   }
1834   PetscFunctionReturn(0);
1835 }
1836 
1837 #undef __FUNCT__
1838 #define __FUNCT__ "MatCreateMPISBAIJ"
1839 /*@C
1840    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1841    (block compressed row).  For good matrix assembly performance
1842    the user should preallocate the matrix storage by setting the parameters
1843    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1844    performance can be increased by more than a factor of 50.
1845 
1846    Collective on MPI_Comm
1847 
1848    Input Parameters:
1849 +  comm - MPI communicator
1850 .  bs   - size of blockk
1851 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1852            This value should be the same as the local size used in creating the
1853            y vector for the matrix-vector product y = Ax.
1854 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1855            This value should be the same as the local size used in creating the
1856            x vector for the matrix-vector product y = Ax.
1857 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1858 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1859 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1860            submatrix  (same for all local rows)
1861 .  d_nnz - array containing the number of block nonzeros in the various block rows
1862            in the upper triangular portion of the in diagonal portion of the local
1863            (possibly different for each block block row) or PETSC_NULL.
1864            You must leave room for the diagonal entry even if it is zero.
1865 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1866            submatrix (same for all local rows).
1867 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1868            off-diagonal portion of the local submatrix (possibly different for
1869            each block row) or PETSC_NULL.
1870 
1871    Output Parameter:
1872 .  A - the matrix
1873 
1874    Options Database Keys:
1875 .   -mat_no_unroll - uses code that does not unroll the loops in the
1876                      block calculations (much slower)
1877 .   -mat_block_size - size of the blocks to use
1878 .   -mat_mpi - use the parallel matrix data structures even on one processor
1879                (defaults to using SeqBAIJ format on one processor)
1880 
1881    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1882    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
1883    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
1884    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1885 
1886    Notes:
1887    The number of rows and columns must be divisible by blocksize.
1888    This matrix type does not support complex Hermitian operation.
1889 
1890    The user MUST specify either the local or global matrix dimensions
1891    (possibly both).
1892 
1893    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1894    than it must be used on all processors that share the object for that argument.
1895 
1896    If the *_nnz parameter is given then the *_nz parameter is ignored
1897 
1898    Storage Information:
1899    For a square global matrix we define each processor's diagonal portion
1900    to be its local rows and the corresponding columns (a square submatrix);
1901    each processor's off-diagonal portion encompasses the remainder of the
1902    local matrix (a rectangular submatrix).
1903 
1904    The user can specify preallocated storage for the diagonal part of
1905    the local submatrix with either d_nz or d_nnz (not both).  Set
1906    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1907    memory allocation.  Likewise, specify preallocated storage for the
1908    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1909 
1910    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1911    the figure below we depict these three local rows and all columns (0-11).
1912 
1913 .vb
1914            0 1 2 3 4 5 6 7 8 9 10 11
1915           -------------------
1916    row 3  |  o o o d d d o o o o o o
1917    row 4  |  o o o d d d o o o o o o
1918    row 5  |  o o o d d d o o o o o o
1919           -------------------
1920 .ve
1921 
1922    Thus, any entries in the d locations are stored in the d (diagonal)
1923    submatrix, and any entries in the o locations are stored in the
1924    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1925    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1926 
1927    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1928    plus the diagonal part of the d matrix,
1929    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1930    In general, for PDE problems in which most nonzeros are near the diagonal,
1931    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1932    or you will get TERRIBLE performance; see the users' manual chapter on
1933    matrices.
1934 
1935    Level: intermediate
1936 
1937 .keywords: matrix, block, aij, compressed row, sparse, parallel
1938 
1939 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1940 @*/
1941 
1942 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1943 {
1944   PetscErrorCode ierr;
1945   PetscMPIInt    size;
1946 
1947   PetscFunctionBegin;
1948   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1949   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1950   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1951   if (size > 1) {
1952     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1953     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1954   } else {
1955     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1956     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1957   }
1958   PetscFunctionReturn(0);
1959 }
1960 
1961 
1962 #undef __FUNCT__
1963 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1964 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1965 {
1966   Mat            mat;
1967   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1968   PetscErrorCode ierr;
1969   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
1970   PetscScalar    *array;
1971 
1972   PetscFunctionBegin;
1973   *newmat       = 0;
1974   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
1975   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
1976   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
1977   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1978   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
1979   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
1980 
1981   mat->factor       = matin->factor;
1982   mat->preallocated = PETSC_TRUE;
1983   mat->assembled    = PETSC_TRUE;
1984   mat->insertmode   = NOT_SET_VALUES;
1985 
1986   a = (Mat_MPISBAIJ*)mat->data;
1987   a->bs2   = oldmat->bs2;
1988   a->mbs   = oldmat->mbs;
1989   a->nbs   = oldmat->nbs;
1990   a->Mbs   = oldmat->Mbs;
1991   a->Nbs   = oldmat->Nbs;
1992 
1993 
1994   a->size         = oldmat->size;
1995   a->rank         = oldmat->rank;
1996   a->donotstash   = oldmat->donotstash;
1997   a->roworiented  = oldmat->roworiented;
1998   a->rowindices   = 0;
1999   a->rowvalues    = 0;
2000   a->getrowactive = PETSC_FALSE;
2001   a->barray       = 0;
2002   a->rstartbs    = oldmat->rstartbs;
2003   a->rendbs      = oldmat->rendbs;
2004   a->cstartbs    = oldmat->cstartbs;
2005   a->cendbs      = oldmat->cendbs;
2006 
2007   /* hash table stuff */
2008   a->ht           = 0;
2009   a->hd           = 0;
2010   a->ht_size      = 0;
2011   a->ht_flag      = oldmat->ht_flag;
2012   a->ht_fact      = oldmat->ht_fact;
2013   a->ht_total_ct  = 0;
2014   a->ht_insert_ct = 0;
2015 
2016   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2017   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2018   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2019   if (oldmat->colmap) {
2020 #if defined (PETSC_USE_CTABLE)
2021     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2022 #else
2023     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2024     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2025     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2026 #endif
2027   } else a->colmap = 0;
2028 
2029   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2030     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2031     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2032     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2033   } else a->garray = 0;
2034 
2035   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2036   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2037   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2038   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2039 
2040   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2041   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2042   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2043   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2044 
2045   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2046   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2047   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2048   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2049   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2050   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2051   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2052   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2053   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2054   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2055   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2056   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2057   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2058 
2059   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2060   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2061   a->sMvctx = oldmat->sMvctx;
2062   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2063 
2064   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2065   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2066   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2067   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2068   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2069   *newmat = mat;
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #include "petscsys.h"
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatLoad_MPISBAIJ"
2077 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2078 {
2079   Mat            A;
2080   PetscErrorCode ierr;
2081   PetscInt       i,nz,j,rstart,rend;
2082   PetscScalar    *vals,*buf;
2083   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2084   MPI_Status     status;
2085   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2086   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2087   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2088   PetscInt       bs=1,Mbs,mbs,extra_rows;
2089   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2090   PetscInt       dcount,kmax,k,nzcount,tmp;
2091   int            fd;
2092 
2093   PetscFunctionBegin;
2094   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2095     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2096   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2097 
2098   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2099   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2100   if (!rank) {
2101     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2102     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2103     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2104     if (header[3] < 0) {
2105       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2106     }
2107   }
2108 
2109   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2110   M = header[1]; N = header[2];
2111 
2112   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2113 
2114   /*
2115      This code adds extra rows to make sure the number of rows is
2116      divisible by the blocksize
2117   */
2118   Mbs        = M/bs;
2119   extra_rows = bs - M + bs*(Mbs);
2120   if (extra_rows == bs) extra_rows = 0;
2121   else                  Mbs++;
2122   if (extra_rows &&!rank) {
2123     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2124   }
2125 
2126   /* determine ownership of all rows */
2127   mbs        = Mbs/size + ((Mbs % size) > rank);
2128   m          = mbs*bs;
2129   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2130   browners   = rowners + size + 1;
2131   mmbs       = PetscMPIIntCast(mbs);
2132   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2133   rowners[0] = 0;
2134   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2135   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2136   rstart = rowners[rank];
2137   rend   = rowners[rank+1];
2138 
2139   /* distribute row lengths to all processors */
2140   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2141   if (!rank) {
2142     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2143     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2144     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2145     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2146     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2147     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2148     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2149   } else {
2150     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2151   }
2152 
2153   if (!rank) {   /* procs[0] */
2154     /* calculate the number of nonzeros on each processor */
2155     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2156     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2157     for (i=0; i<size; i++) {
2158       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2159         procsnz[i] += rowlengths[j];
2160       }
2161     }
2162     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2163 
2164     /* determine max buffer needed and allocate it */
2165     maxnz = 0;
2166     for (i=0; i<size; i++) {
2167       maxnz = PetscMax(maxnz,procsnz[i]);
2168     }
2169     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2170 
2171     /* read in my part of the matrix column indices  */
2172     nz     = procsnz[0];
2173     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2174     mycols = ibuf;
2175     if (size == 1)  nz -= extra_rows;
2176     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2177     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2178 
2179     /* read in every ones (except the last) and ship off */
2180     for (i=1; i<size-1; i++) {
2181       nz   = procsnz[i];
2182       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2183       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2184     }
2185     /* read in the stuff for the last proc */
2186     if (size != 1) {
2187       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2188       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2189       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2190       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2191     }
2192     ierr = PetscFree(cols);CHKERRQ(ierr);
2193   } else {  /* procs[i], i>0 */
2194     /* determine buffer space needed for message */
2195     nz = 0;
2196     for (i=0; i<m; i++) {
2197       nz += locrowlens[i];
2198     }
2199     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2200     mycols = ibuf;
2201     /* receive message of column indices*/
2202     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2203     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2204     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2205   }
2206 
2207   /* loop over local rows, determining number of off diagonal entries */
2208   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2209   odlens   = dlens + (rend-rstart);
2210   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2211   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2212   masked1  = mask    + Mbs;
2213   masked2  = masked1 + Mbs;
2214   rowcount = 0; nzcount = 0;
2215   for (i=0; i<mbs; i++) {
2216     dcount  = 0;
2217     odcount = 0;
2218     for (j=0; j<bs; j++) {
2219       kmax = locrowlens[rowcount];
2220       for (k=0; k<kmax; k++) {
2221         tmp = mycols[nzcount++]/bs; /* block col. index */
2222         if (!mask[tmp]) {
2223           mask[tmp] = 1;
2224           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2225           else masked1[dcount++] = tmp; /* entry in diag portion */
2226         }
2227       }
2228       rowcount++;
2229     }
2230 
2231     dlens[i]  = dcount;  /* d_nzz[i] */
2232     odlens[i] = odcount; /* o_nzz[i] */
2233 
2234     /* zero out the mask elements we set */
2235     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2236     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2237   }
2238 
2239   /* create our matrix */
2240   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2241   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2242   ierr = MatSetType(A,type);CHKERRQ(ierr);
2243   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2244 
2245   if (!rank) {
2246     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2247     /* read in my part of the matrix numerical values  */
2248     nz = procsnz[0];
2249     vals = buf;
2250     mycols = ibuf;
2251     if (size == 1)  nz -= extra_rows;
2252     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2253     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2254 
2255     /* insert into matrix */
2256     jj      = rstart*bs;
2257     for (i=0; i<m; i++) {
2258       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2259       mycols += locrowlens[i];
2260       vals   += locrowlens[i];
2261       jj++;
2262     }
2263 
2264     /* read in other processors (except the last one) and ship out */
2265     for (i=1; i<size-1; i++) {
2266       nz   = procsnz[i];
2267       vals = buf;
2268       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2269       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2270     }
2271     /* the last proc */
2272     if (size != 1){
2273       nz   = procsnz[i] - extra_rows;
2274       vals = buf;
2275       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2276       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2277       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2278     }
2279     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2280 
2281   } else {
2282     /* receive numeric values */
2283     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2284 
2285     /* receive message of values*/
2286     vals   = buf;
2287     mycols = ibuf;
2288     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2289     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2290     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2291 
2292     /* insert into matrix */
2293     jj      = rstart*bs;
2294     for (i=0; i<m; i++) {
2295       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2296       mycols += locrowlens[i];
2297       vals   += locrowlens[i];
2298       jj++;
2299     }
2300   }
2301 
2302   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2303   ierr = PetscFree(buf);CHKERRQ(ierr);
2304   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2305   ierr = PetscFree(rowners);CHKERRQ(ierr);
2306   ierr = PetscFree(dlens);CHKERRQ(ierr);
2307   ierr = PetscFree(mask);CHKERRQ(ierr);
2308   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2309   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2310   *newmat = A;
2311   PetscFunctionReturn(0);
2312 }
2313 
2314 #undef __FUNCT__
2315 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2316 /*XXXXX@
2317    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2318 
2319    Input Parameters:
2320 .  mat  - the matrix
2321 .  fact - factor
2322 
2323    Collective on Mat
2324 
2325    Level: advanced
2326 
2327   Notes:
2328    This can also be set by the command line option: -mat_use_hash_table fact
2329 
2330 .keywords: matrix, hashtable, factor, HT
2331 
2332 .seealso: MatSetOption()
2333 @XXXXX*/
2334 
2335 
2336 #undef __FUNCT__
2337 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2338 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2339 {
2340   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2341   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2342   PetscReal      atmp;
2343   PetscReal      *work,*svalues,*rvalues;
2344   PetscErrorCode ierr;
2345   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2346   PetscMPIInt    rank,size;
2347   PetscInt       *rowners_bs,dest,count,source;
2348   PetscScalar    *va;
2349   MatScalar      *ba;
2350   MPI_Status     stat;
2351 
2352   PetscFunctionBegin;
2353   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2354   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2355   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2356 
2357   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2358   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2359 
2360   bs   = A->rmap.bs;
2361   mbs  = a->mbs;
2362   Mbs  = a->Mbs;
2363   ba   = b->a;
2364   bi   = b->i;
2365   bj   = b->j;
2366 
2367   /* find ownerships */
2368   rowners_bs = A->rmap.range;
2369 
2370   /* each proc creates an array to be distributed */
2371   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2372   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2373 
2374   /* row_max for B */
2375   if (rank != size-1){
2376     for (i=0; i<mbs; i++) {
2377       ncols = bi[1] - bi[0]; bi++;
2378       brow  = bs*i;
2379       for (j=0; j<ncols; j++){
2380         bcol = bs*(*bj);
2381         for (kcol=0; kcol<bs; kcol++){
2382           col = bcol + kcol;                 /* local col index */
2383           col += rowners_bs[rank+1];      /* global col index */
2384           for (krow=0; krow<bs; krow++){
2385             atmp = PetscAbsScalar(*ba); ba++;
2386             row = brow + krow;    /* local row index */
2387             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2388             if (work[col] < atmp) work[col] = atmp;
2389           }
2390         }
2391         bj++;
2392       }
2393     }
2394 
2395     /* send values to its owners */
2396     for (dest=rank+1; dest<size; dest++){
2397       svalues = work + rowners_bs[dest];
2398       count   = rowners_bs[dest+1]-rowners_bs[dest];
2399       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2400     }
2401   }
2402 
2403   /* receive values */
2404   if (rank){
2405     rvalues = work;
2406     count   = rowners_bs[rank+1]-rowners_bs[rank];
2407     for (source=0; source<rank; source++){
2408       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2409       /* process values */
2410       for (i=0; i<count; i++){
2411         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2412       }
2413     }
2414   }
2415 
2416   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2417   ierr = PetscFree(work);CHKERRQ(ierr);
2418   PetscFunctionReturn(0);
2419 }
2420 
2421 #undef __FUNCT__
2422 #define __FUNCT__ "MatRelax_MPISBAIJ"
2423 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2424 {
2425   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2426   PetscErrorCode ierr;
2427   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2428   PetscScalar    *x,*b,*ptr,zero=0.0;
2429   Vec            bb1;
2430 
2431   PetscFunctionBegin;
2432   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2433   if (bs > 1)
2434     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2435 
2436   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2437     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2438       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2439       its--;
2440     }
2441 
2442     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2443     while (its--){
2444 
2445       /* lower triangular part: slvec0b = - B^T*xx */
2446       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2447 
2448       /* copy xx into slvec0a */
2449       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2450       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2451       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2452       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2453 
2454       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2455 
2456       /* copy bb into slvec1a */
2457       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2458       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2459       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2460       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2461 
2462       /* set slvec1b = 0 */
2463       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2464 
2465       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2466       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2467       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2468       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2469 
2470       /* upper triangular part: bb1 = bb1 - B*x */
2471       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2472 
2473       /* local diagonal sweep */
2474       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2475     }
2476     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2477   } else {
2478     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2479   }
2480   PetscFunctionReturn(0);
2481 }
2482 
2483 #undef __FUNCT__
2484 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2485 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2486 {
2487   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2488   PetscErrorCode ierr;
2489   Vec            lvec1,bb1;
2490 
2491   PetscFunctionBegin;
2492   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2493   if (matin->rmap.bs > 1)
2494     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2495 
2496   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2497     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2498       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2499       its--;
2500     }
2501 
2502     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2503     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2504     while (its--){
2505       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2506 
2507       /* lower diagonal part: bb1 = bb - B^T*xx */
2508       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2509       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2510 
2511       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2512       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2513       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2514 
2515       /* upper diagonal part: bb1 = bb1 - B*x */
2516       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2517       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2518 
2519       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2520 
2521       /* diagonal sweep */
2522       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2523     }
2524     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2525     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2526   } else {
2527     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2528   }
2529   PetscFunctionReturn(0);
2530 }
2531 
2532