xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision bd920abb044c96993aaee00a1f0a41db581d8359)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
103       N = nrow++ - 1;  \
104       /* shift up all the later entries in this row */ \
105       for (ii=N; ii>=_i; ii--) { \
106         rp[ii+1] = rp[ii]; \
107         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
108       } \
109       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
110       rp[_i]                      = bcol;  \
111       ap[bs2*_i + bs*cidx + ridx] = value;  \
112       a_noinsert:; \
113     ailen[brow] = nrow; \
114 }
115 #ifndef MatSetValues_SeqBAIJ_B_Private
116 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
117 { \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for (_i=low; _i<high; _i++) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
140       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       b_noinsert:; \
151     bilen[brow] = nrow; \
152 }
153 #endif
154 
155 #if defined(PETSC_USE_MAT_SINGLE)
156 #undef __FUNCT__
157 #define __FUNCT__ "MatSetValues_MPISBAIJ"
158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
159 {
160   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,N = m*n;
163   MatScalar      *vsingle;
164 
165   PetscFunctionBegin;
166   if (N > b->setvalueslen) {
167     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
168     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
169     b->setvalueslen  = N;
170   }
171   vsingle = b->setvaluescopy;
172 
173   for (i=0; i<N; i++) {
174     vsingle[i] = v[i];
175   }
176   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
183 {
184   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
185   PetscErrorCode ierr;
186   PetscInt       i,N = m*n*b->bs2;
187   MatScalar      *vsingle;
188 
189   PetscFunctionBegin;
190   if (N > b->setvalueslen) {
191     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
192     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
193     b->setvalueslen  = N;
194   }
195   vsingle = b->setvaluescopy;
196   for (i=0; i<N; i++) {
197     vsingle[i] = v[i];
198   }
199   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 #endif
203 
204 /* Only add/insert a(i,j) with i<=j (blocks).
205    Any a(i,j) with i>j input by user is ingored.
206 */
207 #undef __FUNCT__
208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscTruth     roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap.rstart;
217   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
218   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
219 
220   /* Some Variables required in the macro */
221   Mat            A = baij->A;
222   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar      *aa=a->a;
225 
226   Mat            B = baij->B;
227   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar     *ba=b->a;
230 
231   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar     *ap,*bap;
234 
235   /* for stash */
236   PetscInt      n_loc, *in_loc = PETSC_NULL;
237   MatScalar     *v_loc = PETSC_NULL;
238 
239   PetscFunctionBegin;
240 
241   if (!baij->donotstash){
242     if (n > baij->n_loc) {
243       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
244       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
246       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs){
262           if (a->ignore_ltriangular){
263             continue;    /* ignore lower triangular blocks */
264           } else {
265             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
266           }
267         }
268         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
269           col = in[j] - cstart_orig;          /* local col index */
270           brow = row/bs; bcol = col/bs;
271           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
272           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
274           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
275         } else if (in[j] < 0) continue;
276 #if defined(PETSC_USE_DEBUG)
277         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
278 #endif
279         else {  /* off-diag entry (B) */
280           if (mat->was_assembled) {
281             if (!baij->colmap) {
282               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
283             }
284 #if defined (PETSC_USE_CTABLE)
285             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
286             col  = col - 1;
287 #else
288             col = baij->colmap[in[j]/bs] - 1;
289 #endif
290             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
291               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
292               col =  in[j];
293               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
294               B = baij->B;
295               b = (Mat_SeqBAIJ*)(B)->data;
296               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
297               ba=b->a;
298             } else col += in[j]%bs;
299           } else col = in[j];
300           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (!baij->donotstash) {
307         n_loc = 0;
308         for (j=0; j<n; j++){
309           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
310           in_loc[n_loc] = in[j];
311           if (roworiented) {
312             v_loc[n_loc] = v[i*n+j];
313           } else {
314             v_loc[n_loc] = v[j*m+i];
315           }
316           n_loc++;
317         }
318         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
319       }
320     }
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
328 {
329   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
330   const MatScalar *value;
331   MatScalar       *barray=baij->barray;
332   PetscTruth      roworiented = baij->roworiented;
333   PetscErrorCode  ierr;
334   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
335   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
336   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
337 
338   PetscFunctionBegin;
339   if(!barray) {
340     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
341     baij->barray = barray;
342   }
343 
344   if (roworiented) {
345     stepval = (n-1)*bs;
346   } else {
347     stepval = (m-1)*bs;
348   }
349   for (i=0; i<m; i++) {
350     if (im[i] < 0) continue;
351 #if defined(PETSC_USE_DEBUG)
352     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
353 #endif
354     if (im[i] >= rstart && im[i] < rend) {
355       row = im[i] - rstart;
356       for (j=0; j<n; j++) {
357         /* If NumCol = 1 then a copy is not required */
358         if ((roworiented) && (n == 1)) {
359           barray = (MatScalar*) v + i*bs2;
360         } else if((!roworiented) && (m == 1)) {
361           barray = (MatScalar*) v + j*bs2;
362         } else { /* Here a copy is required */
363           if (roworiented) {
364             value = v + i*(stepval+bs)*bs + j*bs;
365           } else {
366             value = v + j*(stepval+bs)*bs + i*bs;
367           }
368           for (ii=0; ii<bs; ii++,value+=stepval) {
369             for (jj=0; jj<bs; jj++) {
370               *barray++  = *value++;
371             }
372           }
373           barray -=bs2;
374         }
375 
376         if (in[j] >= cstart && in[j] < cend){
377           col  = in[j] - cstart;
378           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
379         }
380         else if (in[j] < 0) continue;
381 #if defined(PETSC_USE_DEBUG)
382         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
383 #endif
384         else {
385           if (mat->was_assembled) {
386             if (!baij->colmap) {
387               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388             }
389 
390 #if defined(PETSC_USE_DEBUG)
391 #if defined (PETSC_USE_CTABLE)
392             { PetscInt data;
393               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
394               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
395             }
396 #else
397             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
398 #endif
399 #endif
400 #if defined (PETSC_USE_CTABLE)
401 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
402             col  = (col - 1)/bs;
403 #else
404             col = (baij->colmap[in[j]] - 1)/bs;
405 #endif
406             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
407               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
408               col =  in[j];
409             }
410           }
411           else col = in[j];
412           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
413         }
414       }
415     } else {
416       if (!baij->donotstash) {
417         if (roworiented) {
418           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
419         } else {
420           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
421         }
422       }
423     }
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatGetValues_MPISBAIJ"
430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
431 {
432   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
433   PetscErrorCode ierr;
434   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
435   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
436 
437   PetscFunctionBegin;
438   for (i=0; i<m; i++) {
439     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
440     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
441     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
442       row = idxm[i] - bsrstart;
443       for (j=0; j<n; j++) {
444         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
445         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
446         if (idxn[j] >= bscstart && idxn[j] < bscend){
447           col = idxn[j] - bscstart;
448           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
449         } else {
450           if (!baij->colmap) {
451             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
452           }
453 #if defined (PETSC_USE_CTABLE)
454           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
455           data --;
456 #else
457           data = baij->colmap[idxn[j]/bs]-1;
458 #endif
459           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
460           else {
461             col  = data + idxn[j]%bs;
462             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
463           }
464         }
465       }
466     } else {
467       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
468     }
469   }
470  PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatNorm_MPISBAIJ"
475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
476 {
477   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
478   PetscErrorCode ierr;
479   PetscReal      sum[2],*lnorm2;
480 
481   PetscFunctionBegin;
482   if (baij->size == 1) {
483     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
484   } else {
485     if (type == NORM_FROBENIUS) {
486       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
487       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
488       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
489       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
490       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
491       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
492       *norm = sqrt(sum[0] + 2*sum[1]);
493       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
494     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
495       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
496       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
497       PetscReal    *rsum,*rsum2,vabs;
498       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
499       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
500       MatScalar    *v;
501 
502       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
503       rsum2 = rsum + mat->cmap.N;
504       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
505       /* Amat */
506       v = amat->a; jj = amat->j;
507       for (brow=0; brow<mbs; brow++) {
508         grow = bs*(rstart + brow);
509         nz = amat->i[brow+1] - amat->i[brow];
510         for (bcol=0; bcol<nz; bcol++){
511           gcol = bs*(rstart + *jj); jj++;
512           for (col=0; col<bs; col++){
513             for (row=0; row<bs; row++){
514               vabs = PetscAbsScalar(*v); v++;
515               rsum[gcol+col] += vabs;
516               /* non-diagonal block */
517               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
518             }
519           }
520         }
521       }
522       /* Bmat */
523       v = bmat->a; jj = bmat->j;
524       for (brow=0; brow<mbs; brow++) {
525         grow = bs*(rstart + brow);
526         nz = bmat->i[brow+1] - bmat->i[brow];
527         for (bcol=0; bcol<nz; bcol++){
528           gcol = bs*garray[*jj]; jj++;
529           for (col=0; col<bs; col++){
530             for (row=0; row<bs; row++){
531               vabs = PetscAbsScalar(*v); v++;
532               rsum[gcol+col] += vabs;
533               rsum[grow+row] += vabs;
534             }
535           }
536         }
537       }
538       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
539       *norm = 0.0;
540       for (col=0; col<mat->cmap.N; col++) {
541         if (rsum2[col] > *norm) *norm = rsum2[col];
542       }
543       ierr = PetscFree(rsum);CHKERRQ(ierr);
544     } else {
545       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
554 {
555   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
556   PetscErrorCode ierr;
557   PetscInt       nstash,reallocs;
558   InsertMode     addv;
559 
560   PetscFunctionBegin;
561   if (baij->donotstash) {
562     PetscFunctionReturn(0);
563   }
564 
565   /* make sure all processors are either in INSERTMODE or ADDMODE */
566   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
567   if (addv == (ADD_VALUES|INSERT_VALUES)) {
568     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
569   }
570   mat->insertmode = addv; /* in case this processor had no cache */
571 
572   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
573   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
574   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
575   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
576   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
577   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
584 {
585   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
586   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589   PetscInt       *row,*col,other_disassembled;
590   PetscMPIInt    n;
591   PetscTruth     r1,r2,r3;
592   MatScalar      *val;
593   InsertMode     addv = mat->insertmode;
594 
595   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
596   PetscFunctionBegin;
597 
598   if (!baij->donotstash) {
599     while (1) {
600       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
601       if (!flg) break;
602 
603       for (i=0; i<n;) {
604         /* Now identify the consecutive vals belonging to the same row */
605         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
606         if (j < n) ncols = j-i;
607         else       ncols = n-i;
608         /* Now assemble all these values with a single function call */
609         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
610         i = j;
611       }
612     }
613     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
614     /* Now process the block-stash. Since the values are stashed column-oriented,
615        set the roworiented flag to column oriented, and after MatSetValues()
616        restore the original flags */
617     r1 = baij->roworiented;
618     r2 = a->roworiented;
619     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
620     baij->roworiented = PETSC_FALSE;
621     a->roworiented    = PETSC_FALSE;
622     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
623     while (1) {
624       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
625       if (!flg) break;
626 
627       for (i=0; i<n;) {
628         /* Now identify the consecutive vals belonging to the same row */
629         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
630         if (j < n) ncols = j-i;
631         else       ncols = n-i;
632         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
633         i = j;
634       }
635     }
636     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
637     baij->roworiented = r1;
638     a->roworiented    = r2;
639     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
640   }
641 
642   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
643   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
644 
645   /* determine if any processor has disassembled, if so we must
646      also disassemble ourselfs, in order that we may reassemble. */
647   /*
648      if nonzero structure of submatrix B cannot change then we know that
649      no processor disassembled thus we can skip this stuff
650   */
651   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
652     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
653     if (mat->was_assembled && !other_disassembled) {
654       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
655     }
656   }
657 
658   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
659     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
660   }
661   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
662   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
663   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
664 
665   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
666   baij->rowvalues = 0;
667 
668   PetscFunctionReturn(0);
669 }
670 
671 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
672 #undef __FUNCT__
673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675 {
676   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode    ierr;
678   PetscInt          bs = mat->rmap.bs;
679   PetscMPIInt       size = baij->size,rank = baij->rank;
680   PetscTruth        iascii,isdraw;
681   PetscViewer       sviewer;
682   PetscViewerFormat format;
683 
684   PetscFunctionBegin;
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
687   if (iascii) {
688     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
692       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
702       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
703       PetscFunctionReturn(0);
704     } else if (format == PETSC_VIEWER_ASCII_INFO) {
705       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
706       PetscFunctionReturn(0);
707     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
708       PetscFunctionReturn(0);
709     }
710   }
711 
712   if (isdraw) {
713     PetscDraw  draw;
714     PetscTruth isnull;
715     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
716     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
717   }
718 
719   if (size == 1) {
720     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
721     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
722   } else {
723     /* assemble the entire matrix onto first processor. */
724     Mat          A;
725     Mat_SeqSBAIJ *Aloc;
726     Mat_SeqBAIJ  *Bloc;
727     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
728     MatScalar    *a;
729 
730     /* Should this be the same type as mat? */
731     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
732     if (!rank) {
733       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
734     } else {
735       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
736     }
737     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
738     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
740 
741     /* copy over the A part */
742     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
743     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
744     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
745 
746     for (i=0; i<mbs; i++) {
747       rvals[0] = bs*(baij->rstartbs + i);
748       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
749       for (j=ai[i]; j<ai[i+1]; j++) {
750         col = (baij->cstartbs+aj[j])*bs;
751         for (k=0; k<bs; k++) {
752           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
753           col++; a += bs;
754         }
755       }
756     }
757     /* copy over the B part */
758     Bloc = (Mat_SeqBAIJ*)baij->B->data;
759     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
760     for (i=0; i<mbs; i++) {
761 
762       rvals[0] = bs*(baij->rstartbs + i);
763       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764       for (j=ai[i]; j<ai[i+1]; j++) {
765         col = baij->garray[aj[j]]*bs;
766         for (k=0; k<bs; k++) {
767           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768           col++; a += bs;
769         }
770       }
771     }
772     ierr = PetscFree(rvals);CHKERRQ(ierr);
773     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
774     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
775     /*
776        Everyone has to call to draw the matrix since the graphics waits are
777        synchronized across all processors that share the PetscDraw object
778     */
779     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
780     if (!rank) {
781       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
782       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
783     }
784     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
785     ierr = MatDestroy(A);CHKERRQ(ierr);
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 #undef __FUNCT__
791 #define __FUNCT__ "MatView_MPISBAIJ"
792 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
793 {
794   PetscErrorCode ierr;
795   PetscTruth     iascii,isdraw,issocket,isbinary;
796 
797   PetscFunctionBegin;
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
801   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
802   if (iascii || isdraw || issocket || isbinary) {
803     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
804   } else {
805     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
806   }
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatDestroy_MPISBAIJ"
812 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
813 {
814   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818 #if defined(PETSC_USE_LOG)
819   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
820 #endif
821   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
822   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
823   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
824   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
825 #if defined (PETSC_USE_CTABLE)
826   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
827 #else
828   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
829 #endif
830   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
831   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
832   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
833   if (baij->slvec0) {
834     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
835     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
836   }
837   if (baij->slvec1) {
838     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
839     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
840     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
841   }
842   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
843   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
844   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
845   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
846 #if defined(PETSC_USE_MAT_SINGLE)
847   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
848 #endif
849   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
850   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
851   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
852   ierr = PetscFree(baij);CHKERRQ(ierr);
853 
854   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatMult_MPISBAIJ"
864 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
865 {
866   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
867   PetscErrorCode ierr;
868   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
869   PetscScalar    *x,*from,zero=0.0;
870 
871   PetscFunctionBegin;
872   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
873   if (nt != A->cmap.n) {
874     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
875   }
876 
877   /* diagonal part */
878   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
879   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
880 
881   /* subdiagonal part */
882   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
883   CHKMEMQ;
884   /* copy x into the vec slvec0 */
885   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
886   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
887   CHKMEMQ;
888   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
889   CHKMEMQ;
890   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
891 
892   CHKMEMQ;
893   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894   CHKMEMQ;
895   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
896   CHKMEMQ;
897   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
898     CHKMEMQ;
899   /* supperdiagonal part */
900   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
901     CHKMEMQ;
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
907 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
908 {
909   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
910   PetscErrorCode ierr;
911   PetscInt       nt;
912 
913   PetscFunctionBegin;
914   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
915   if (nt != A->cmap.n) {
916     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
917   }
918   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
919   if (nt != A->rmap.N) {
920     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
921   }
922 
923   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
924   /* do diagonal part */
925   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
926   /* do supperdiagonal part */
927   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
928   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
929   /* do subdiagonal part */
930   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
931   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
932   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
933 
934   PetscFunctionReturn(0);
935 }
936 
937 #undef __FUNCT__
938 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
939 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
940 {
941   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
942   PetscErrorCode ierr;
943   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
944   PetscScalar    *x,*from,zero=0.0;
945 
946   PetscFunctionBegin;
947   /*
948   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
949   PetscSynchronizedFlush(((PetscObject)A)->comm);
950   */
951   /* diagonal part */
952   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
953   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
954 
955   /* subdiagonal part */
956   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
957 
958   /* copy x into the vec slvec0 */
959   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
960   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
961   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
962   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
963 
964   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
965   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
966   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
967 
968   /* supperdiagonal part */
969   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
970 
971   PetscFunctionReturn(0);
972 }
973 
974 #undef __FUNCT__
975 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
976 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
977 {
978   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
979   PetscErrorCode ierr;
980 
981   PetscFunctionBegin;
982   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
983   /* do diagonal part */
984   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
985   /* do supperdiagonal part */
986   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
987   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
988 
989   /* do subdiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
992   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
993 
994   PetscFunctionReturn(0);
995 }
996 
997 /*
998   This only works correctly for square matrices where the subblock A->A is the
999    diagonal block
1000 */
1001 #undef __FUNCT__
1002 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1003 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1004 {
1005   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1006   PetscErrorCode ierr;
1007 
1008   PetscFunctionBegin;
1009   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1010   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1011   PetscFunctionReturn(0);
1012 }
1013 
1014 #undef __FUNCT__
1015 #define __FUNCT__ "MatScale_MPISBAIJ"
1016 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1017 {
1018   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1019   PetscErrorCode ierr;
1020 
1021   PetscFunctionBegin;
1022   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1023   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1024   PetscFunctionReturn(0);
1025 }
1026 
1027 #undef __FUNCT__
1028 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1029 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1030 {
1031   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1032   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1033   PetscErrorCode ierr;
1034   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1035   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1036   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1037 
1038   PetscFunctionBegin;
1039   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1040   mat->getrowactive = PETSC_TRUE;
1041 
1042   if (!mat->rowvalues && (idx || v)) {
1043     /*
1044         allocate enough space to hold information from the longest row.
1045     */
1046     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1047     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1048     PetscInt     max = 1,mbs = mat->mbs,tmp;
1049     for (i=0; i<mbs; i++) {
1050       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1051       if (max < tmp) { max = tmp; }
1052     }
1053     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1054     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1055   }
1056 
1057   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1058   lrow = row - brstart;  /* local row index */
1059 
1060   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1061   if (!v)   {pvA = 0; pvB = 0;}
1062   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1063   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1064   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1065   nztot = nzA + nzB;
1066 
1067   cmap  = mat->garray;
1068   if (v  || idx) {
1069     if (nztot) {
1070       /* Sort by increasing column numbers, assuming A and B already sorted */
1071       PetscInt imark = -1;
1072       if (v) {
1073         *v = v_p = mat->rowvalues;
1074         for (i=0; i<nzB; i++) {
1075           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1076           else break;
1077         }
1078         imark = i;
1079         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1080         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1081       }
1082       if (idx) {
1083         *idx = idx_p = mat->rowindices;
1084         if (imark > -1) {
1085           for (i=0; i<imark; i++) {
1086             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1087           }
1088         } else {
1089           for (i=0; i<nzB; i++) {
1090             if (cmap[cworkB[i]/bs] < cstart)
1091               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1092             else break;
1093           }
1094           imark = i;
1095         }
1096         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1097         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1098       }
1099     } else {
1100       if (idx) *idx = 0;
1101       if (v)   *v   = 0;
1102     }
1103   }
1104   *nz = nztot;
1105   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1106   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1112 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1113 {
1114   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1115 
1116   PetscFunctionBegin;
1117   if (!baij->getrowactive) {
1118     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1119   }
1120   baij->getrowactive = PETSC_FALSE;
1121   PetscFunctionReturn(0);
1122 }
1123 
1124 #undef __FUNCT__
1125 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1126 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1127 {
1128   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1129   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1130 
1131   PetscFunctionBegin;
1132   aA->getrow_utriangular = PETSC_TRUE;
1133   PetscFunctionReturn(0);
1134 }
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1137 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1138 {
1139   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1140   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1141 
1142   PetscFunctionBegin;
1143   aA->getrow_utriangular = PETSC_FALSE;
1144   PetscFunctionReturn(0);
1145 }
1146 
1147 #undef __FUNCT__
1148 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1149 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1150 {
1151   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1152   PetscErrorCode ierr;
1153 
1154   PetscFunctionBegin;
1155   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1156   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1157   PetscFunctionReturn(0);
1158 }
1159 
1160 #undef __FUNCT__
1161 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1162 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1163 {
1164   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1165   PetscErrorCode ierr;
1166 
1167   PetscFunctionBegin;
1168   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1169   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1170   PetscFunctionReturn(0);
1171 }
1172 
1173 #undef __FUNCT__
1174 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1175 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1176 {
1177   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1178   PetscErrorCode ierr;
1179 
1180   PetscFunctionBegin;
1181   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1182   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1183   PetscFunctionReturn(0);
1184 }
1185 
1186 #undef __FUNCT__
1187 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1188 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1189 {
1190   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1191   Mat            A = a->A,B = a->B;
1192   PetscErrorCode ierr;
1193   PetscReal      isend[5],irecv[5];
1194 
1195   PetscFunctionBegin;
1196   info->block_size     = (PetscReal)matin->rmap.bs;
1197   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1198   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1199   isend[3] = info->memory;  isend[4] = info->mallocs;
1200   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1201   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1202   isend[3] += info->memory;  isend[4] += info->mallocs;
1203   if (flag == MAT_LOCAL) {
1204     info->nz_used      = isend[0];
1205     info->nz_allocated = isend[1];
1206     info->nz_unneeded  = isend[2];
1207     info->memory       = isend[3];
1208     info->mallocs      = isend[4];
1209   } else if (flag == MAT_GLOBAL_MAX) {
1210     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1211     info->nz_used      = irecv[0];
1212     info->nz_allocated = irecv[1];
1213     info->nz_unneeded  = irecv[2];
1214     info->memory       = irecv[3];
1215     info->mallocs      = irecv[4];
1216   } else if (flag == MAT_GLOBAL_SUM) {
1217     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1218     info->nz_used      = irecv[0];
1219     info->nz_allocated = irecv[1];
1220     info->nz_unneeded  = irecv[2];
1221     info->memory       = irecv[3];
1222     info->mallocs      = irecv[4];
1223   } else {
1224     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1225   }
1226   info->rows_global       = (PetscReal)A->rmap.N;
1227   info->columns_global    = (PetscReal)A->cmap.N;
1228   info->rows_local        = (PetscReal)A->rmap.N;
1229   info->columns_local     = (PetscReal)A->cmap.N;
1230   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1231   info->fill_ratio_needed = 0;
1232   info->factor_mallocs    = 0;
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1238 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1239 {
1240   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1241   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1242   PetscErrorCode ierr;
1243 
1244   PetscFunctionBegin;
1245   switch (op) {
1246   case MAT_NEW_NONZERO_LOCATIONS:
1247   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1248   case MAT_KEEP_ZEROED_ROWS:
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1251     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1252     break;
1253   case MAT_ROW_ORIENTED:
1254     a->roworiented = flg;
1255     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1256     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1257     break;
1258   case MAT_NEW_DIAGONALS:
1259     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1260     break;
1261   case MAT_IGNORE_OFF_PROC_ENTRIES:
1262     a->donotstash = flg;
1263     break;
1264   case MAT_USE_HASH_TABLE:
1265     a->ht_flag = flg;
1266     break;
1267   case MAT_HERMITIAN:
1268     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1269   case MAT_SYMMETRIC:
1270   case MAT_STRUCTURALLY_SYMMETRIC:
1271   case MAT_SYMMETRY_ETERNAL:
1272     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1273     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1274     break;
1275   case MAT_IGNORE_LOWER_TRIANGULAR:
1276     aA->ignore_ltriangular = flg;
1277     break;
1278   case MAT_ERROR_LOWER_TRIANGULAR:
1279     aA->ignore_ltriangular = flg;
1280     break;
1281   case MAT_GETROW_UPPERTRIANGULAR:
1282     aA->getrow_utriangular = flg;
1283     break;
1284   default:
1285     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1286   }
1287   PetscFunctionReturn(0);
1288 }
1289 
1290 #undef __FUNCT__
1291 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1292 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1293 {
1294   PetscErrorCode ierr;
1295   PetscFunctionBegin;
1296   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1297   PetscFunctionReturn(0);
1298 }
1299 
1300 #undef __FUNCT__
1301 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1302 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1303 {
1304   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1305   Mat            a=baij->A, b=baij->B;
1306   PetscErrorCode ierr;
1307   PetscInt       nv,m,n;
1308   PetscTruth     flg;
1309 
1310   PetscFunctionBegin;
1311   if (ll != rr){
1312     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1313     if (!flg)
1314       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1315   }
1316   if (!ll) PetscFunctionReturn(0);
1317 
1318   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1319   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1320 
1321   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1322   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1323 
1324   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1325 
1326   /* left diagonalscale the off-diagonal part */
1327   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1328 
1329   /* scale the diagonal part */
1330   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1331 
1332   /* right diagonalscale the off-diagonal part */
1333   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1334   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1335   PetscFunctionReturn(0);
1336 }
1337 
1338 #undef __FUNCT__
1339 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1340 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1341 {
1342   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1343   PetscErrorCode ierr;
1344 
1345   PetscFunctionBegin;
1346   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatEqual_MPISBAIJ"
1354 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1355 {
1356   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1357   Mat            a,b,c,d;
1358   PetscTruth     flg;
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   a = matA->A; b = matA->B;
1363   c = matB->A; d = matB->B;
1364 
1365   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1366   if (flg) {
1367     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1368   }
1369   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1370   PetscFunctionReturn(0);
1371 }
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatCopy_MPISBAIJ"
1375 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1376 {
1377   PetscErrorCode ierr;
1378   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1379   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1380 
1381   PetscFunctionBegin;
1382   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1383   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1384     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1385     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1386     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1387   } else {
1388     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1389     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1390   }
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1396 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1397 {
1398   PetscErrorCode ierr;
1399 
1400   PetscFunctionBegin;
1401   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #include "petscblaslapack.h"
1406 #undef __FUNCT__
1407 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1408 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1409 {
1410   PetscErrorCode ierr;
1411   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1412   PetscBLASInt   bnz,one=1;
1413   Mat_SeqSBAIJ   *xa,*ya;
1414   Mat_SeqBAIJ    *xb,*yb;
1415 
1416   PetscFunctionBegin;
1417   if (str == SAME_NONZERO_PATTERN) {
1418     PetscScalar alpha = a;
1419     xa = (Mat_SeqSBAIJ *)xx->A->data;
1420     ya = (Mat_SeqSBAIJ *)yy->A->data;
1421     bnz = (PetscBLASInt)xa->nz;
1422     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1423     xb = (Mat_SeqBAIJ *)xx->B->data;
1424     yb = (Mat_SeqBAIJ *)yy->B->data;
1425     bnz = (PetscBLASInt)xb->nz;
1426     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1427   } else {
1428     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1429     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1430     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1431   }
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #undef __FUNCT__
1436 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1437 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1438 {
1439   PetscErrorCode ierr;
1440   PetscInt       i;
1441   PetscTruth     flg;
1442 
1443   PetscFunctionBegin;
1444   for (i=0; i<n; i++) {
1445     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1446     if (!flg) {
1447       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1448     }
1449   }
1450   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1451   PetscFunctionReturn(0);
1452 }
1453 
1454 
1455 /* -------------------------------------------------------------------*/
1456 static struct _MatOps MatOps_Values = {
1457        MatSetValues_MPISBAIJ,
1458        MatGetRow_MPISBAIJ,
1459        MatRestoreRow_MPISBAIJ,
1460        MatMult_MPISBAIJ,
1461 /* 4*/ MatMultAdd_MPISBAIJ,
1462        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1463        MatMultAdd_MPISBAIJ,
1464        0,
1465        0,
1466        0,
1467 /*10*/ 0,
1468        0,
1469        0,
1470        MatRelax_MPISBAIJ,
1471        MatTranspose_MPISBAIJ,
1472 /*15*/ MatGetInfo_MPISBAIJ,
1473        MatEqual_MPISBAIJ,
1474        MatGetDiagonal_MPISBAIJ,
1475        MatDiagonalScale_MPISBAIJ,
1476        MatNorm_MPISBAIJ,
1477 /*20*/ MatAssemblyBegin_MPISBAIJ,
1478        MatAssemblyEnd_MPISBAIJ,
1479        0,
1480        MatSetOption_MPISBAIJ,
1481        MatZeroEntries_MPISBAIJ,
1482 /*25*/ 0,
1483        0,
1484        0,
1485        0,
1486        0,
1487 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1488        0,
1489        0,
1490        0,
1491        0,
1492 /*35*/ MatDuplicate_MPISBAIJ,
1493        0,
1494        0,
1495        0,
1496        0,
1497 /*40*/ MatAXPY_MPISBAIJ,
1498        MatGetSubMatrices_MPISBAIJ,
1499        MatIncreaseOverlap_MPISBAIJ,
1500        MatGetValues_MPISBAIJ,
1501        MatCopy_MPISBAIJ,
1502 /*45*/ 0,
1503        MatScale_MPISBAIJ,
1504        0,
1505        0,
1506        0,
1507 /*50*/ 0,
1508        0,
1509        0,
1510        0,
1511        0,
1512 /*55*/ 0,
1513        0,
1514        MatSetUnfactored_MPISBAIJ,
1515        0,
1516        MatSetValuesBlocked_MPISBAIJ,
1517 /*60*/ 0,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*65*/ 0,
1523        0,
1524        0,
1525        0,
1526        0,
1527 /*70*/ MatGetRowMaxAbs_MPISBAIJ,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*75*/ 0,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*80*/ 0,
1538        0,
1539        0,
1540        0,
1541        MatLoad_MPISBAIJ,
1542 /*85*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*90*/ 0,
1548        0,
1549        0,
1550        0,
1551        0,
1552 /*95*/ 0,
1553        0,
1554        0,
1555        0,
1556        0,
1557 /*100*/0,
1558        0,
1559        0,
1560        0,
1561        0,
1562 /*105*/0,
1563        MatRealPart_MPISBAIJ,
1564        MatImaginaryPart_MPISBAIJ,
1565        MatGetRowUpperTriangular_MPISBAIJ,
1566        MatRestoreRowUpperTriangular_MPISBAIJ
1567 };
1568 
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1573 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1574 {
1575   PetscFunctionBegin;
1576   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1577   *iscopy = PETSC_FALSE;
1578   PetscFunctionReturn(0);
1579 }
1580 EXTERN_C_END
1581 
1582 EXTERN_C_BEGIN
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1585 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1586 {
1587   Mat_MPISBAIJ   *b;
1588   PetscErrorCode ierr;
1589   PetscInt       i,mbs,Mbs;
1590 
1591   PetscFunctionBegin;
1592   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1593     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
1594   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1595 
1596   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1597   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1598   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1599   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1600   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1601 
1602   B->rmap.bs = B->cmap.bs = bs;
1603   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1604   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1605 
1606   if (d_nnz) {
1607     for (i=0; i<B->rmap.n/bs; i++) {
1608       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1609     }
1610   }
1611   if (o_nnz) {
1612     for (i=0; i<B->rmap.n/bs; i++) {
1613       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1614     }
1615   }
1616   B->preallocated = PETSC_TRUE;
1617 
1618   b   = (Mat_MPISBAIJ*)B->data;
1619   mbs = B->rmap.n/bs;
1620   Mbs = B->rmap.N/bs;
1621   if (mbs*bs != B->rmap.n) {
1622     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1623   }
1624 
1625   B->rmap.bs  = bs;
1626   b->bs2 = bs*bs;
1627   b->mbs = mbs;
1628   b->nbs = mbs;
1629   b->Mbs = Mbs;
1630   b->Nbs = Mbs;
1631 
1632   for (i=0; i<=b->size; i++) {
1633     b->rangebs[i] = B->rmap.range[i]/bs;
1634   }
1635   b->rstartbs = B->rmap.rstart/bs;
1636   b->rendbs   = B->rmap.rend/bs;
1637 
1638   b->cstartbs = B->cmap.rstart/bs;
1639   b->cendbs   = B->cmap.rend/bs;
1640 
1641   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1642   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1643   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1644   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1645   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1646 
1647   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1648   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1649   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1650   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1651   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1652 
1653   /* build cache for off array entries formed */
1654   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1655 
1656   PetscFunctionReturn(0);
1657 }
1658 EXTERN_C_END
1659 
1660 /*MC
1661    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1662    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1663 
1664    Options Database Keys:
1665 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1666 
1667   Level: beginner
1668 
1669 .seealso: MatCreateMPISBAIJ
1670 M*/
1671 
1672 EXTERN_C_BEGIN
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatCreate_MPISBAIJ"
1675 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1676 {
1677   Mat_MPISBAIJ   *b;
1678   PetscErrorCode ierr;
1679   PetscTruth     flg;
1680 
1681   PetscFunctionBegin;
1682 
1683   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1684   B->data = (void*)b;
1685   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1686 
1687   B->ops->destroy    = MatDestroy_MPISBAIJ;
1688   B->ops->view       = MatView_MPISBAIJ;
1689   B->mapping    = 0;
1690   B->factor     = 0;
1691   B->assembled  = PETSC_FALSE;
1692 
1693   B->insertmode = NOT_SET_VALUES;
1694   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1695   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1696 
1697   /* build local table of row and column ownerships */
1698   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1699 
1700   /* build cache for off array entries formed */
1701   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1702   b->donotstash  = PETSC_FALSE;
1703   b->colmap      = PETSC_NULL;
1704   b->garray      = PETSC_NULL;
1705   b->roworiented = PETSC_TRUE;
1706 
1707 #if defined(PETSC_USE_MAT_SINGLE)
1708   /* stuff for MatSetValues_XXX in single precision */
1709   b->setvalueslen     = 0;
1710   b->setvaluescopy    = PETSC_NULL;
1711 #endif
1712 
1713   /* stuff used in block assembly */
1714   b->barray       = 0;
1715 
1716   /* stuff used for matrix vector multiply */
1717   b->lvec         = 0;
1718   b->Mvctx        = 0;
1719   b->slvec0       = 0;
1720   b->slvec0b      = 0;
1721   b->slvec1       = 0;
1722   b->slvec1a      = 0;
1723   b->slvec1b      = 0;
1724   b->sMvctx       = 0;
1725 
1726   /* stuff for MatGetRow() */
1727   b->rowindices   = 0;
1728   b->rowvalues    = 0;
1729   b->getrowactive = PETSC_FALSE;
1730 
1731   /* hash table stuff */
1732   b->ht           = 0;
1733   b->hd           = 0;
1734   b->ht_size      = 0;
1735   b->ht_flag      = PETSC_FALSE;
1736   b->ht_fact      = 0;
1737   b->ht_total_ct  = 0;
1738   b->ht_insert_ct = 0;
1739 
1740   b->in_loc       = 0;
1741   b->v_loc        = 0;
1742   b->n_loc        = 0;
1743   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1744     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1745     if (flg) {
1746       PetscReal fact = 1.39;
1747       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1748       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1749       if (fact <= 1.0) fact = 1.39;
1750       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1751       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1752     }
1753   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1754 
1755   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1756                                      "MatStoreValues_MPISBAIJ",
1757                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1758   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1759                                      "MatRetrieveValues_MPISBAIJ",
1760                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1761   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1762                                      "MatGetDiagonalBlock_MPISBAIJ",
1763                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1764   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1765                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1766                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1767   B->symmetric                  = PETSC_TRUE;
1768   B->structurally_symmetric     = PETSC_TRUE;
1769   B->symmetric_set              = PETSC_TRUE;
1770   B->structurally_symmetric_set = PETSC_TRUE;
1771   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1772   PetscFunctionReturn(0);
1773 }
1774 EXTERN_C_END
1775 
1776 /*MC
1777    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1778 
1779    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1780    and MATMPISBAIJ otherwise.
1781 
1782    Options Database Keys:
1783 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1784 
1785   Level: beginner
1786 
1787 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1788 M*/
1789 
1790 EXTERN_C_BEGIN
1791 #undef __FUNCT__
1792 #define __FUNCT__ "MatCreate_SBAIJ"
1793 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1794 {
1795   PetscErrorCode ierr;
1796   PetscMPIInt    size;
1797 
1798   PetscFunctionBegin;
1799   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1800   if (size == 1) {
1801     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1802   } else {
1803     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1804   }
1805   PetscFunctionReturn(0);
1806 }
1807 EXTERN_C_END
1808 
1809 #undef __FUNCT__
1810 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1811 /*@C
1812    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1813    the user should preallocate the matrix storage by setting the parameters
1814    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1815    performance can be increased by more than a factor of 50.
1816 
1817    Collective on Mat
1818 
1819    Input Parameters:
1820 +  A - the matrix
1821 .  bs   - size of blockk
1822 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1823            submatrix  (same for all local rows)
1824 .  d_nnz - array containing the number of block nonzeros in the various block rows
1825            in the upper triangular and diagonal part of the in diagonal portion of the local
1826            (possibly different for each block row) or PETSC_NULL.  You must leave room
1827            for the diagonal entry even if it is zero.
1828 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1829            submatrix (same for all local rows).
1830 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1831            off-diagonal portion of the local submatrix (possibly different for
1832            each block row) or PETSC_NULL.
1833 
1834 
1835    Options Database Keys:
1836 .   -mat_no_unroll - uses code that does not unroll the loops in the
1837                      block calculations (much slower)
1838 .   -mat_block_size - size of the blocks to use
1839 
1840    Notes:
1841 
1842    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1843    than it must be used on all processors that share the object for that argument.
1844 
1845    If the *_nnz parameter is given then the *_nz parameter is ignored
1846 
1847    Storage Information:
1848    For a square global matrix we define each processor's diagonal portion
1849    to be its local rows and the corresponding columns (a square submatrix);
1850    each processor's off-diagonal portion encompasses the remainder of the
1851    local matrix (a rectangular submatrix).
1852 
1853    The user can specify preallocated storage for the diagonal part of
1854    the local submatrix with either d_nz or d_nnz (not both).  Set
1855    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1856    memory allocation.  Likewise, specify preallocated storage for the
1857    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1858 
1859    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1860    the figure below we depict these three local rows and all columns (0-11).
1861 
1862 .vb
1863            0 1 2 3 4 5 6 7 8 9 10 11
1864           -------------------
1865    row 3  |  o o o d d d o o o o o o
1866    row 4  |  o o o d d d o o o o o o
1867    row 5  |  o o o d d d o o o o o o
1868           -------------------
1869 .ve
1870 
1871    Thus, any entries in the d locations are stored in the d (diagonal)
1872    submatrix, and any entries in the o locations are stored in the
1873    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1874    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1875 
1876    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1877    plus the diagonal part of the d matrix,
1878    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1879    In general, for PDE problems in which most nonzeros are near the diagonal,
1880    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1881    or you will get TERRIBLE performance; see the users' manual chapter on
1882    matrices.
1883 
1884    Level: intermediate
1885 
1886 .keywords: matrix, block, aij, compressed row, sparse, parallel
1887 
1888 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1889 @*/
1890 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1891 {
1892   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1893 
1894   PetscFunctionBegin;
1895   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1896   if (f) {
1897     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatCreateMPISBAIJ"
1904 /*@C
1905    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1906    (block compressed row).  For good matrix assembly performance
1907    the user should preallocate the matrix storage by setting the parameters
1908    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1909    performance can be increased by more than a factor of 50.
1910 
1911    Collective on MPI_Comm
1912 
1913    Input Parameters:
1914 +  comm - MPI communicator
1915 .  bs   - size of blockk
1916 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1917            This value should be the same as the local size used in creating the
1918            y vector for the matrix-vector product y = Ax.
1919 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1920            This value should be the same as the local size used in creating the
1921            x vector for the matrix-vector product y = Ax.
1922 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1923 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1924 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1925            submatrix  (same for all local rows)
1926 .  d_nnz - array containing the number of block nonzeros in the various block rows
1927            in the upper triangular portion of the in diagonal portion of the local
1928            (possibly different for each block block row) or PETSC_NULL.
1929            You must leave room for the diagonal entry even if it is zero.
1930 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1931            submatrix (same for all local rows).
1932 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1933            off-diagonal portion of the local submatrix (possibly different for
1934            each block row) or PETSC_NULL.
1935 
1936    Output Parameter:
1937 .  A - the matrix
1938 
1939    Options Database Keys:
1940 .   -mat_no_unroll - uses code that does not unroll the loops in the
1941                      block calculations (much slower)
1942 .   -mat_block_size - size of the blocks to use
1943 .   -mat_mpi - use the parallel matrix data structures even on one processor
1944                (defaults to using SeqBAIJ format on one processor)
1945 
1946    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1947    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
1948    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
1949    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1950 
1951    Notes:
1952    The number of rows and columns must be divisible by blocksize.
1953    This matrix type does not support complex Hermitian operation.
1954 
1955    The user MUST specify either the local or global matrix dimensions
1956    (possibly both).
1957 
1958    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1959    than it must be used on all processors that share the object for that argument.
1960 
1961    If the *_nnz parameter is given then the *_nz parameter is ignored
1962 
1963    Storage Information:
1964    For a square global matrix we define each processor's diagonal portion
1965    to be its local rows and the corresponding columns (a square submatrix);
1966    each processor's off-diagonal portion encompasses the remainder of the
1967    local matrix (a rectangular submatrix).
1968 
1969    The user can specify preallocated storage for the diagonal part of
1970    the local submatrix with either d_nz or d_nnz (not both).  Set
1971    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1972    memory allocation.  Likewise, specify preallocated storage for the
1973    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1974 
1975    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1976    the figure below we depict these three local rows and all columns (0-11).
1977 
1978 .vb
1979            0 1 2 3 4 5 6 7 8 9 10 11
1980           -------------------
1981    row 3  |  o o o d d d o o o o o o
1982    row 4  |  o o o d d d o o o o o o
1983    row 5  |  o o o d d d o o o o o o
1984           -------------------
1985 .ve
1986 
1987    Thus, any entries in the d locations are stored in the d (diagonal)
1988    submatrix, and any entries in the o locations are stored in the
1989    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1990    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1991 
1992    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1993    plus the diagonal part of the d matrix,
1994    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1995    In general, for PDE problems in which most nonzeros are near the diagonal,
1996    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1997    or you will get TERRIBLE performance; see the users' manual chapter on
1998    matrices.
1999 
2000    Level: intermediate
2001 
2002 .keywords: matrix, block, aij, compressed row, sparse, parallel
2003 
2004 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2005 @*/
2006 
2007 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2008 {
2009   PetscErrorCode ierr;
2010   PetscMPIInt    size;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2014   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2015   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2016   if (size > 1) {
2017     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2018     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2019   } else {
2020     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2021     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2029 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2030 {
2031   Mat            mat;
2032   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2033   PetscErrorCode ierr;
2034   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2035   PetscScalar    *array;
2036 
2037   PetscFunctionBegin;
2038   *newmat       = 0;
2039   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2040   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2041   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2042   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2043   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2044   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2045 
2046   mat->factor       = matin->factor;
2047   mat->preallocated = PETSC_TRUE;
2048   mat->assembled    = PETSC_TRUE;
2049   mat->insertmode   = NOT_SET_VALUES;
2050 
2051   a = (Mat_MPISBAIJ*)mat->data;
2052   a->bs2   = oldmat->bs2;
2053   a->mbs   = oldmat->mbs;
2054   a->nbs   = oldmat->nbs;
2055   a->Mbs   = oldmat->Mbs;
2056   a->Nbs   = oldmat->Nbs;
2057 
2058 
2059   a->size         = oldmat->size;
2060   a->rank         = oldmat->rank;
2061   a->donotstash   = oldmat->donotstash;
2062   a->roworiented  = oldmat->roworiented;
2063   a->rowindices   = 0;
2064   a->rowvalues    = 0;
2065   a->getrowactive = PETSC_FALSE;
2066   a->barray       = 0;
2067   a->rstartbs    = oldmat->rstartbs;
2068   a->rendbs      = oldmat->rendbs;
2069   a->cstartbs    = oldmat->cstartbs;
2070   a->cendbs      = oldmat->cendbs;
2071 
2072   /* hash table stuff */
2073   a->ht           = 0;
2074   a->hd           = 0;
2075   a->ht_size      = 0;
2076   a->ht_flag      = oldmat->ht_flag;
2077   a->ht_fact      = oldmat->ht_fact;
2078   a->ht_total_ct  = 0;
2079   a->ht_insert_ct = 0;
2080 
2081   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2082   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2083   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2084   if (oldmat->colmap) {
2085 #if defined (PETSC_USE_CTABLE)
2086     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2087 #else
2088     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2089     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2090     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2091 #endif
2092   } else a->colmap = 0;
2093 
2094   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2095     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2096     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2097     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2098   } else a->garray = 0;
2099 
2100   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2101   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2102   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2103   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2104 
2105   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2106   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2107   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2108   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2109 
2110   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2111   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2113   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2114   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2115   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2116   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2117   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2122   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2123 
2124   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2125   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2126   a->sMvctx = oldmat->sMvctx;
2127   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2128 
2129   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2130   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2131   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2133   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2134   *newmat = mat;
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 #include "petscsys.h"
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatLoad_MPISBAIJ"
2142 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2143 {
2144   Mat            A;
2145   PetscErrorCode ierr;
2146   PetscInt       i,nz,j,rstart,rend;
2147   PetscScalar    *vals,*buf;
2148   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2149   MPI_Status     status;
2150   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2151   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2152   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2153   PetscInt       bs=1,Mbs,mbs,extra_rows;
2154   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2155   PetscInt       dcount,kmax,k,nzcount,tmp;
2156   int            fd;
2157 
2158   PetscFunctionBegin;
2159   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2160     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2161   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2162 
2163   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2164   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2165   if (!rank) {
2166     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2167     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2168     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2169     if (header[3] < 0) {
2170       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2171     }
2172   }
2173 
2174   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2175   M = header[1]; N = header[2];
2176 
2177   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2178 
2179   /*
2180      This code adds extra rows to make sure the number of rows is
2181      divisible by the blocksize
2182   */
2183   Mbs        = M/bs;
2184   extra_rows = bs - M + bs*(Mbs);
2185   if (extra_rows == bs) extra_rows = 0;
2186   else                  Mbs++;
2187   if (extra_rows &&!rank) {
2188     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2189   }
2190 
2191   /* determine ownership of all rows */
2192   mbs        = Mbs/size + ((Mbs % size) > rank);
2193   m          = mbs*bs;
2194   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2195   browners   = rowners + size + 1;
2196   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2197   rowners[0] = 0;
2198   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2199   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2200   rstart = rowners[rank];
2201   rend   = rowners[rank+1];
2202 
2203   /* distribute row lengths to all processors */
2204   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2205   if (!rank) {
2206     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2207     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2208     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2209     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2210     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2211     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2212     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2213   } else {
2214     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2215   }
2216 
2217   if (!rank) {   /* procs[0] */
2218     /* calculate the number of nonzeros on each processor */
2219     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2220     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2221     for (i=0; i<size; i++) {
2222       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2223         procsnz[i] += rowlengths[j];
2224       }
2225     }
2226     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2227 
2228     /* determine max buffer needed and allocate it */
2229     maxnz = 0;
2230     for (i=0; i<size; i++) {
2231       maxnz = PetscMax(maxnz,procsnz[i]);
2232     }
2233     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2234 
2235     /* read in my part of the matrix column indices  */
2236     nz     = procsnz[0];
2237     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2238     mycols = ibuf;
2239     if (size == 1)  nz -= extra_rows;
2240     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2241     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2242 
2243     /* read in every ones (except the last) and ship off */
2244     for (i=1; i<size-1; i++) {
2245       nz   = procsnz[i];
2246       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2247       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2248     }
2249     /* read in the stuff for the last proc */
2250     if (size != 1) {
2251       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2252       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2253       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2254       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2255     }
2256     ierr = PetscFree(cols);CHKERRQ(ierr);
2257   } else {  /* procs[i], i>0 */
2258     /* determine buffer space needed for message */
2259     nz = 0;
2260     for (i=0; i<m; i++) {
2261       nz += locrowlens[i];
2262     }
2263     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2264     mycols = ibuf;
2265     /* receive message of column indices*/
2266     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2267     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2268     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2269   }
2270 
2271   /* loop over local rows, determining number of off diagonal entries */
2272   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2273   odlens   = dlens + (rend-rstart);
2274   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2275   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2276   masked1  = mask    + Mbs;
2277   masked2  = masked1 + Mbs;
2278   rowcount = 0; nzcount = 0;
2279   for (i=0; i<mbs; i++) {
2280     dcount  = 0;
2281     odcount = 0;
2282     for (j=0; j<bs; j++) {
2283       kmax = locrowlens[rowcount];
2284       for (k=0; k<kmax; k++) {
2285         tmp = mycols[nzcount++]/bs; /* block col. index */
2286         if (!mask[tmp]) {
2287           mask[tmp] = 1;
2288           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2289           else masked1[dcount++] = tmp; /* entry in diag portion */
2290         }
2291       }
2292       rowcount++;
2293     }
2294 
2295     dlens[i]  = dcount;  /* d_nzz[i] */
2296     odlens[i] = odcount; /* o_nzz[i] */
2297 
2298     /* zero out the mask elements we set */
2299     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2300     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2301   }
2302 
2303   /* create our matrix */
2304   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2305   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2306   ierr = MatSetType(A,type);CHKERRQ(ierr);
2307   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2308 
2309   if (!rank) {
2310     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2311     /* read in my part of the matrix numerical values  */
2312     nz = procsnz[0];
2313     vals = buf;
2314     mycols = ibuf;
2315     if (size == 1)  nz -= extra_rows;
2316     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2317     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2318 
2319     /* insert into matrix */
2320     jj      = rstart*bs;
2321     for (i=0; i<m; i++) {
2322       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2323       mycols += locrowlens[i];
2324       vals   += locrowlens[i];
2325       jj++;
2326     }
2327 
2328     /* read in other processors (except the last one) and ship out */
2329     for (i=1; i<size-1; i++) {
2330       nz   = procsnz[i];
2331       vals = buf;
2332       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2333       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2334     }
2335     /* the last proc */
2336     if (size != 1){
2337       nz   = procsnz[i] - extra_rows;
2338       vals = buf;
2339       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2340       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2341       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2342     }
2343     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2344 
2345   } else {
2346     /* receive numeric values */
2347     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2348 
2349     /* receive message of values*/
2350     vals   = buf;
2351     mycols = ibuf;
2352     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2353     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2354     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2355 
2356     /* insert into matrix */
2357     jj      = rstart*bs;
2358     for (i=0; i<m; i++) {
2359       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2360       mycols += locrowlens[i];
2361       vals   += locrowlens[i];
2362       jj++;
2363     }
2364   }
2365 
2366   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2367   ierr = PetscFree(buf);CHKERRQ(ierr);
2368   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2369   ierr = PetscFree(rowners);CHKERRQ(ierr);
2370   ierr = PetscFree(dlens);CHKERRQ(ierr);
2371   ierr = PetscFree(mask);CHKERRQ(ierr);
2372   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2373   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2374   *newmat = A;
2375   PetscFunctionReturn(0);
2376 }
2377 
2378 #undef __FUNCT__
2379 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2380 /*XXXXX@
2381    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2382 
2383    Input Parameters:
2384 .  mat  - the matrix
2385 .  fact - factor
2386 
2387    Collective on Mat
2388 
2389    Level: advanced
2390 
2391   Notes:
2392    This can also be set by the command line option: -mat_use_hash_table fact
2393 
2394 .keywords: matrix, hashtable, factor, HT
2395 
2396 .seealso: MatSetOption()
2397 @XXXXX*/
2398 
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2402 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2403 {
2404   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2405   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2406   PetscReal      atmp;
2407   PetscReal      *work,*svalues,*rvalues;
2408   PetscErrorCode ierr;
2409   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2410   PetscMPIInt    rank,size;
2411   PetscInt       *rowners_bs,dest,count,source;
2412   PetscScalar    *va;
2413   MatScalar      *ba;
2414   MPI_Status     stat;
2415 
2416   PetscFunctionBegin;
2417   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2418   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2419   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2420 
2421   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2422   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2423 
2424   bs   = A->rmap.bs;
2425   mbs  = a->mbs;
2426   Mbs  = a->Mbs;
2427   ba   = b->a;
2428   bi   = b->i;
2429   bj   = b->j;
2430 
2431   /* find ownerships */
2432   rowners_bs = A->rmap.range;
2433 
2434   /* each proc creates an array to be distributed */
2435   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2436   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2437 
2438   /* row_max for B */
2439   if (rank != size-1){
2440     for (i=0; i<mbs; i++) {
2441       ncols = bi[1] - bi[0]; bi++;
2442       brow  = bs*i;
2443       for (j=0; j<ncols; j++){
2444         bcol = bs*(*bj);
2445         for (kcol=0; kcol<bs; kcol++){
2446           col = bcol + kcol;                 /* local col index */
2447           col += rowners_bs[rank+1];      /* global col index */
2448           for (krow=0; krow<bs; krow++){
2449             atmp = PetscAbsScalar(*ba); ba++;
2450             row = brow + krow;    /* local row index */
2451             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2452             if (work[col] < atmp) work[col] = atmp;
2453           }
2454         }
2455         bj++;
2456       }
2457     }
2458 
2459     /* send values to its owners */
2460     for (dest=rank+1; dest<size; dest++){
2461       svalues = work + rowners_bs[dest];
2462       count   = rowners_bs[dest+1]-rowners_bs[dest];
2463       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2464     }
2465   }
2466 
2467   /* receive values */
2468   if (rank){
2469     rvalues = work;
2470     count   = rowners_bs[rank+1]-rowners_bs[rank];
2471     for (source=0; source<rank; source++){
2472       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2473       /* process values */
2474       for (i=0; i<count; i++){
2475         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2476       }
2477     }
2478   }
2479 
2480   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2481   ierr = PetscFree(work);CHKERRQ(ierr);
2482   PetscFunctionReturn(0);
2483 }
2484 
2485 #undef __FUNCT__
2486 #define __FUNCT__ "MatRelax_MPISBAIJ"
2487 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2488 {
2489   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2490   PetscErrorCode ierr;
2491   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2492   PetscScalar    *x,*b,*ptr,zero=0.0;
2493   Vec            bb1;
2494 
2495   PetscFunctionBegin;
2496   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2497   if (bs > 1)
2498     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2499 
2500   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2501     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2502       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2503       its--;
2504     }
2505 
2506     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2507     while (its--){
2508 
2509       /* lower triangular part: slvec0b = - B^T*xx */
2510       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2511 
2512       /* copy xx into slvec0a */
2513       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2514       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2515       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2516       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2517 
2518       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2519 
2520       /* copy bb into slvec1a */
2521       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2522       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2523       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2524       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2525 
2526       /* set slvec1b = 0 */
2527       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2528 
2529       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2530       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2531       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2532       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2533 
2534       /* upper triangular part: bb1 = bb1 - B*x */
2535       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2536 
2537       /* local diagonal sweep */
2538       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2539     }
2540     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2541   } else {
2542     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2543   }
2544   PetscFunctionReturn(0);
2545 }
2546 
2547 #undef __FUNCT__
2548 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2549 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2550 {
2551   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2552   PetscErrorCode ierr;
2553   Vec            lvec1,bb1;
2554 
2555   PetscFunctionBegin;
2556   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2557   if (matin->rmap.bs > 1)
2558     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2559 
2560   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2561     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2562       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2563       its--;
2564     }
2565 
2566     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2567     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2568     while (its--){
2569       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2570 
2571       /* lower diagonal part: bb1 = bb - B^T*xx */
2572       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2573       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2574 
2575       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2576       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2577       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2578 
2579       /* upper diagonal part: bb1 = bb1 - B*x */
2580       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2581       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2582 
2583       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2584 
2585       /* diagonal sweep */
2586       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2587     }
2588     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2589     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2590   } else {
2591     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2592   }
2593   PetscFunctionReturn(0);
2594 }
2595 
2596