xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a77337e4d7d5143cd577a9cca2c72dcc9694336d)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
103       N = nrow++ - 1;  \
104       /* shift up all the later entries in this row */ \
105       for (ii=N; ii>=_i; ii--) { \
106         rp[ii+1] = rp[ii]; \
107         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
108       } \
109       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
110       rp[_i]                      = bcol;  \
111       ap[bs2*_i + bs*cidx + ridx] = value;  \
112       a_noinsert:; \
113     ailen[brow] = nrow; \
114 }
115 #ifndef MatSetValues_SeqBAIJ_B_Private
116 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
117 { \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for (_i=low; _i<high; _i++) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
140       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       b_noinsert:; \
151     bilen[brow] = nrow; \
152 }
153 #endif
154 
155 #if defined(PETSC_USE_MAT_SINGLE)
156 #undef __FUNCT__
157 #define __FUNCT__ "MatSetValues_MPISBAIJ"
158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
159 {
160   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,N = m*n;
163   MatScalar      *vsingle;
164 
165   PetscFunctionBegin;
166   if (N > b->setvalueslen) {
167     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
168     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
169     b->setvalueslen  = N;
170   }
171   vsingle = b->setvaluescopy;
172 
173   for (i=0; i<N; i++) {
174     vsingle[i] = v[i];
175   }
176   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
183 {
184   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
185   PetscErrorCode ierr;
186   PetscInt       i,N = m*n*b->bs2;
187   MatScalar      *vsingle;
188 
189   PetscFunctionBegin;
190   if (N > b->setvalueslen) {
191     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
192     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
193     b->setvalueslen  = N;
194   }
195   vsingle = b->setvaluescopy;
196   for (i=0; i<N; i++) {
197     vsingle[i] = v[i];
198   }
199   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 #endif
203 
204 /* Only add/insert a(i,j) with i<=j (blocks).
205    Any a(i,j) with i>j input by user is ingored.
206 */
207 #undef __FUNCT__
208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscTruth     roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap.rstart;
217   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
218   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
219 
220   /* Some Variables required in the macro */
221   Mat            A = baij->A;
222   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar      *aa=a->a;
225 
226   Mat            B = baij->B;
227   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar     *ba=b->a;
230 
231   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar     *ap,*bap;
234 
235   /* for stash */
236   PetscInt      n_loc, *in_loc = PETSC_NULL;
237   MatScalar     *v_loc = PETSC_NULL;
238 
239   PetscFunctionBegin;
240 
241   if (!baij->donotstash){
242     if (n > baij->n_loc) {
243       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
244       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
246       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs){
262           if (a->ignore_ltriangular){
263             continue;    /* ignore lower triangular blocks */
264           } else {
265             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
266           }
267         }
268         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
269           col = in[j] - cstart_orig;          /* local col index */
270           brow = row/bs; bcol = col/bs;
271           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
272           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
274           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
275         } else if (in[j] < 0) continue;
276 #if defined(PETSC_USE_DEBUG)
277         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
278 #endif
279         else {  /* off-diag entry (B) */
280           if (mat->was_assembled) {
281             if (!baij->colmap) {
282               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
283             }
284 #if defined (PETSC_USE_CTABLE)
285             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
286             col  = col - 1;
287 #else
288             col = baij->colmap[in[j]/bs] - 1;
289 #endif
290             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
291               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
292               col =  in[j];
293               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
294               B = baij->B;
295               b = (Mat_SeqBAIJ*)(B)->data;
296               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
297               ba=b->a;
298             } else col += in[j]%bs;
299           } else col = in[j];
300           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (!baij->donotstash) {
307         n_loc = 0;
308         for (j=0; j<n; j++){
309           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
310           in_loc[n_loc] = in[j];
311           if (roworiented) {
312             v_loc[n_loc] = v[i*n+j];
313           } else {
314             v_loc[n_loc] = v[j*m+i];
315           }
316           n_loc++;
317         }
318         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
319       }
320     }
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
328 {
329   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
330   const MatScalar *value;
331   MatScalar       *barray=baij->barray;
332   PetscTruth      roworiented = baij->roworiented;
333   PetscErrorCode  ierr;
334   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
335   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
336   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
337 
338   PetscFunctionBegin;
339   if(!barray) {
340     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
341     baij->barray = barray;
342   }
343 
344   if (roworiented) {
345     stepval = (n-1)*bs;
346   } else {
347     stepval = (m-1)*bs;
348   }
349   for (i=0; i<m; i++) {
350     if (im[i] < 0) continue;
351 #if defined(PETSC_USE_DEBUG)
352     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
353 #endif
354     if (im[i] >= rstart && im[i] < rend) {
355       row = im[i] - rstart;
356       for (j=0; j<n; j++) {
357         /* If NumCol = 1 then a copy is not required */
358         if ((roworiented) && (n == 1)) {
359           barray = (MatScalar*) v + i*bs2;
360         } else if((!roworiented) && (m == 1)) {
361           barray = (MatScalar*) v + j*bs2;
362         } else { /* Here a copy is required */
363           if (roworiented) {
364             value = v + i*(stepval+bs)*bs + j*bs;
365           } else {
366             value = v + j*(stepval+bs)*bs + i*bs;
367           }
368           for (ii=0; ii<bs; ii++,value+=stepval) {
369             for (jj=0; jj<bs; jj++) {
370               *barray++  = *value++;
371             }
372           }
373           barray -=bs2;
374         }
375 
376         if (in[j] >= cstart && in[j] < cend){
377           col  = in[j] - cstart;
378           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
379         }
380         else if (in[j] < 0) continue;
381 #if defined(PETSC_USE_DEBUG)
382         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
383 #endif
384         else {
385           if (mat->was_assembled) {
386             if (!baij->colmap) {
387               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388             }
389 
390 #if defined(PETSC_USE_DEBUG)
391 #if defined (PETSC_USE_CTABLE)
392             { PetscInt data;
393               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
394               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
395             }
396 #else
397             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
398 #endif
399 #endif
400 #if defined (PETSC_USE_CTABLE)
401 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
402             col  = (col - 1)/bs;
403 #else
404             col = (baij->colmap[in[j]] - 1)/bs;
405 #endif
406             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
407               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
408               col =  in[j];
409             }
410           }
411           else col = in[j];
412           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
413         }
414       }
415     } else {
416       if (!baij->donotstash) {
417         if (roworiented) {
418           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
419         } else {
420           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
421         }
422       }
423     }
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatGetValues_MPISBAIJ"
430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
431 {
432   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
433   PetscErrorCode ierr;
434   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
435   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
436 
437   PetscFunctionBegin;
438   for (i=0; i<m; i++) {
439     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
440     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
441     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
442       row = idxm[i] - bsrstart;
443       for (j=0; j<n; j++) {
444         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
445         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
446         if (idxn[j] >= bscstart && idxn[j] < bscend){
447           col = idxn[j] - bscstart;
448           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
449         } else {
450           if (!baij->colmap) {
451             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
452           }
453 #if defined (PETSC_USE_CTABLE)
454           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
455           data --;
456 #else
457           data = baij->colmap[idxn[j]/bs]-1;
458 #endif
459           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
460           else {
461             col  = data + idxn[j]%bs;
462             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
463           }
464         }
465       }
466     } else {
467       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
468     }
469   }
470  PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatNorm_MPISBAIJ"
475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
476 {
477   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
478   PetscErrorCode ierr;
479   PetscReal      sum[2],*lnorm2;
480 
481   PetscFunctionBegin;
482   if (baij->size == 1) {
483     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
484   } else {
485     if (type == NORM_FROBENIUS) {
486       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
487       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
488       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
489       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
490       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
491       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
492       *norm = sqrt(sum[0] + 2*sum[1]);
493       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
494     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
495       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
496       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
497       PetscReal    *rsum,*rsum2,vabs;
498       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
499       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
500       MatScalar    *v;
501 
502       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
503       rsum2 = rsum + mat->cmap.N;
504       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
505       /* Amat */
506       v = amat->a; jj = amat->j;
507       for (brow=0; brow<mbs; brow++) {
508         grow = bs*(rstart + brow);
509         nz = amat->i[brow+1] - amat->i[brow];
510         for (bcol=0; bcol<nz; bcol++){
511           gcol = bs*(rstart + *jj); jj++;
512           for (col=0; col<bs; col++){
513             for (row=0; row<bs; row++){
514               vabs = PetscAbsScalar(*v); v++;
515               rsum[gcol+col] += vabs;
516               /* non-diagonal block */
517               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
518             }
519           }
520         }
521       }
522       /* Bmat */
523       v = bmat->a; jj = bmat->j;
524       for (brow=0; brow<mbs; brow++) {
525         grow = bs*(rstart + brow);
526         nz = bmat->i[brow+1] - bmat->i[brow];
527         for (bcol=0; bcol<nz; bcol++){
528           gcol = bs*garray[*jj]; jj++;
529           for (col=0; col<bs; col++){
530             for (row=0; row<bs; row++){
531               vabs = PetscAbsScalar(*v); v++;
532               rsum[gcol+col] += vabs;
533               rsum[grow+row] += vabs;
534             }
535           }
536         }
537       }
538       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
539       *norm = 0.0;
540       for (col=0; col<mat->cmap.N; col++) {
541         if (rsum2[col] > *norm) *norm = rsum2[col];
542       }
543       ierr = PetscFree(rsum);CHKERRQ(ierr);
544     } else {
545       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
554 {
555   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
556   PetscErrorCode ierr;
557   PetscInt       nstash,reallocs;
558   InsertMode     addv;
559 
560   PetscFunctionBegin;
561   if (baij->donotstash) {
562     PetscFunctionReturn(0);
563   }
564 
565   /* make sure all processors are either in INSERTMODE or ADDMODE */
566   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
567   if (addv == (ADD_VALUES|INSERT_VALUES)) {
568     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
569   }
570   mat->insertmode = addv; /* in case this processor had no cache */
571 
572   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
573   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
574   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
575   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
576   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
577   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
584 {
585   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
586   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589   PetscInt       *row,*col;
590   PetscTruth     other_disassembled;
591   PetscMPIInt    n;
592   PetscTruth     r1,r2,r3;
593   MatScalar      *val;
594   InsertMode     addv = mat->insertmode;
595 
596   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
597   PetscFunctionBegin;
598 
599   if (!baij->donotstash) {
600     while (1) {
601       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
602       if (!flg) break;
603 
604       for (i=0; i<n;) {
605         /* Now identify the consecutive vals belonging to the same row */
606         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
607         if (j < n) ncols = j-i;
608         else       ncols = n-i;
609         /* Now assemble all these values with a single function call */
610         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
611         i = j;
612       }
613     }
614     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
615     /* Now process the block-stash. Since the values are stashed column-oriented,
616        set the roworiented flag to column oriented, and after MatSetValues()
617        restore the original flags */
618     r1 = baij->roworiented;
619     r2 = a->roworiented;
620     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
621     baij->roworiented = PETSC_FALSE;
622     a->roworiented    = PETSC_FALSE;
623     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
624     while (1) {
625       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
626       if (!flg) break;
627 
628       for (i=0; i<n;) {
629         /* Now identify the consecutive vals belonging to the same row */
630         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
631         if (j < n) ncols = j-i;
632         else       ncols = n-i;
633         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
634         i = j;
635       }
636     }
637     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
638     baij->roworiented = r1;
639     a->roworiented    = r2;
640     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
641   }
642 
643   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
644   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
645 
646   /* determine if any processor has disassembled, if so we must
647      also disassemble ourselfs, in order that we may reassemble. */
648   /*
649      if nonzero structure of submatrix B cannot change then we know that
650      no processor disassembled thus we can skip this stuff
651   */
652   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
653     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
654     if (mat->was_assembled && !other_disassembled) {
655       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
656     }
657   }
658 
659   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
660     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
661   }
662   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
663   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
664   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
665 
666   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
667   baij->rowvalues = 0;
668 
669   PetscFunctionReturn(0);
670 }
671 
672 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
673 #undef __FUNCT__
674 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
675 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
676 {
677   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
678   PetscErrorCode    ierr;
679   PetscInt          bs = mat->rmap.bs;
680   PetscMPIInt       size = baij->size,rank = baij->rank;
681   PetscTruth        iascii,isdraw;
682   PetscViewer       sviewer;
683   PetscViewerFormat format;
684 
685   PetscFunctionBegin;
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
687   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
688   if (iascii) {
689     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
690     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
691       MatInfo info;
692       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
693       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
694       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
695               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
696               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
697       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
698       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
699       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
700       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
701       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
702       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
703       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
704       PetscFunctionReturn(0);
705     } else if (format == PETSC_VIEWER_ASCII_INFO) {
706       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
707       PetscFunctionReturn(0);
708     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
709       PetscFunctionReturn(0);
710     }
711   }
712 
713   if (isdraw) {
714     PetscDraw  draw;
715     PetscTruth isnull;
716     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
717     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
718   }
719 
720   if (size == 1) {
721     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
722     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
723   } else {
724     /* assemble the entire matrix onto first processor. */
725     Mat          A;
726     Mat_SeqSBAIJ *Aloc;
727     Mat_SeqBAIJ  *Bloc;
728     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
729     MatScalar    *a;
730 
731     /* Should this be the same type as mat? */
732     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
733     if (!rank) {
734       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
735     } else {
736       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
737     }
738     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
739     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
740     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
741 
742     /* copy over the A part */
743     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
744     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
745     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
746 
747     for (i=0; i<mbs; i++) {
748       rvals[0] = bs*(baij->rstartbs + i);
749       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
750       for (j=ai[i]; j<ai[i+1]; j++) {
751         col = (baij->cstartbs+aj[j])*bs;
752         for (k=0; k<bs; k++) {
753           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
754           col++; a += bs;
755         }
756       }
757     }
758     /* copy over the B part */
759     Bloc = (Mat_SeqBAIJ*)baij->B->data;
760     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
761     for (i=0; i<mbs; i++) {
762 
763       rvals[0] = bs*(baij->rstartbs + i);
764       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
765       for (j=ai[i]; j<ai[i+1]; j++) {
766         col = baij->garray[aj[j]]*bs;
767         for (k=0; k<bs; k++) {
768           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
769           col++; a += bs;
770         }
771       }
772     }
773     ierr = PetscFree(rvals);CHKERRQ(ierr);
774     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
775     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
776     /*
777        Everyone has to call to draw the matrix since the graphics waits are
778        synchronized across all processors that share the PetscDraw object
779     */
780     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
781     if (!rank) {
782       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
783       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
784     }
785     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
786     ierr = MatDestroy(A);CHKERRQ(ierr);
787   }
788   PetscFunctionReturn(0);
789 }
790 
791 #undef __FUNCT__
792 #define __FUNCT__ "MatView_MPISBAIJ"
793 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
794 {
795   PetscErrorCode ierr;
796   PetscTruth     iascii,isdraw,issocket,isbinary;
797 
798   PetscFunctionBegin;
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
801   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
802   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
803   if (iascii || isdraw || issocket || isbinary) {
804     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
805   } else {
806     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatDestroy_MPISBAIJ"
813 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
814 {
815   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
816   PetscErrorCode ierr;
817 
818   PetscFunctionBegin;
819 #if defined(PETSC_USE_LOG)
820   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
821 #endif
822   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
823   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
824   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
825   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
826 #if defined (PETSC_USE_CTABLE)
827   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
828 #else
829   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
830 #endif
831   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
832   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
833   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
834   if (baij->slvec0) {
835     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
836     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
837   }
838   if (baij->slvec1) {
839     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
840     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
841     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
842   }
843   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
844   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
845   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
846   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
847 #if defined(PETSC_USE_MAT_SINGLE)
848   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
849 #endif
850   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
851   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
852   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
853   ierr = PetscFree(baij);CHKERRQ(ierr);
854 
855   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
859   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
860   PetscFunctionReturn(0);
861 }
862 
863 #undef __FUNCT__
864 #define __FUNCT__ "MatMult_MPISBAIJ"
865 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
866 {
867   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
868   PetscErrorCode ierr;
869   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
870   PetscScalar    *x,*from,zero=0.0;
871 
872   PetscFunctionBegin;
873   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
874   if (nt != A->cmap.n) {
875     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
876   }
877 
878   /* diagonal part */
879   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
880   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
881 
882   /* subdiagonal part */
883   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
884 
885   /* copy x into the vec slvec0 */
886   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
887   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
888 
889   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
890   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
891   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
892 
893   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
895   /* supperdiagonal part */
896   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
902 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
903 {
904   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
905   PetscErrorCode ierr;
906   PetscInt       nt;
907 
908   PetscFunctionBegin;
909   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
910   if (nt != A->cmap.n) {
911     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
912   }
913   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
914   if (nt != A->rmap.N) {
915     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
916   }
917 
918   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
919   /* do diagonal part */
920   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
921   /* do supperdiagonal part */
922   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
923   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
924   /* do subdiagonal part */
925   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
926   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
927   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
928 
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
934 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
937   PetscErrorCode ierr;
938   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
939   PetscScalar    *x,*from,zero=0.0;
940 
941   PetscFunctionBegin;
942   /*
943   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
944   PetscSynchronizedFlush(((PetscObject)A)->comm);
945   */
946   /* diagonal part */
947   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
948   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
949 
950   /* subdiagonal part */
951   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
952 
953   /* copy x into the vec slvec0 */
954   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
955   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
956   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
957   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
958 
959   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
960   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
961   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
962 
963   /* supperdiagonal part */
964   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
965 
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
971 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
972 {
973   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
978   /* do diagonal part */
979   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
980   /* do supperdiagonal part */
981   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
982   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
983 
984   /* do subdiagonal part */
985   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
986   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
987   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
988 
989   PetscFunctionReturn(0);
990 }
991 
992 /*
993   This only works correctly for square matrices where the subblock A->A is the
994    diagonal block
995 */
996 #undef __FUNCT__
997 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
998 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
999 {
1000   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1005   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatScale_MPISBAIJ"
1011 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1012 {
1013   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1018   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1024 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1025 {
1026   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1027   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1028   PetscErrorCode ierr;
1029   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1030   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1031   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1032 
1033   PetscFunctionBegin;
1034   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1035   mat->getrowactive = PETSC_TRUE;
1036 
1037   if (!mat->rowvalues && (idx || v)) {
1038     /*
1039         allocate enough space to hold information from the longest row.
1040     */
1041     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1042     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1043     PetscInt     max = 1,mbs = mat->mbs,tmp;
1044     for (i=0; i<mbs; i++) {
1045       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1046       if (max < tmp) { max = tmp; }
1047     }
1048     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1049     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1050   }
1051 
1052   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1053   lrow = row - brstart;  /* local row index */
1054 
1055   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1056   if (!v)   {pvA = 0; pvB = 0;}
1057   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1058   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1059   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1060   nztot = nzA + nzB;
1061 
1062   cmap  = mat->garray;
1063   if (v  || idx) {
1064     if (nztot) {
1065       /* Sort by increasing column numbers, assuming A and B already sorted */
1066       PetscInt imark = -1;
1067       if (v) {
1068         *v = v_p = mat->rowvalues;
1069         for (i=0; i<nzB; i++) {
1070           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1071           else break;
1072         }
1073         imark = i;
1074         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1075         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1076       }
1077       if (idx) {
1078         *idx = idx_p = mat->rowindices;
1079         if (imark > -1) {
1080           for (i=0; i<imark; i++) {
1081             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1082           }
1083         } else {
1084           for (i=0; i<nzB; i++) {
1085             if (cmap[cworkB[i]/bs] < cstart)
1086               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1087             else break;
1088           }
1089           imark = i;
1090         }
1091         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1092         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1093       }
1094     } else {
1095       if (idx) *idx = 0;
1096       if (v)   *v   = 0;
1097     }
1098   }
1099   *nz = nztot;
1100   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1101   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1107 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1108 {
1109   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1110 
1111   PetscFunctionBegin;
1112   if (!baij->getrowactive) {
1113     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1114   }
1115   baij->getrowactive = PETSC_FALSE;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1121 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1122 {
1123   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1124   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1125 
1126   PetscFunctionBegin;
1127   aA->getrow_utriangular = PETSC_TRUE;
1128   PetscFunctionReturn(0);
1129 }
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1132 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1133 {
1134   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1135   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1136 
1137   PetscFunctionBegin;
1138   aA->getrow_utriangular = PETSC_FALSE;
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1144 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1151   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1157 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1158 {
1159   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1164   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1170 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1171 {
1172   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1177   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1183 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1184 {
1185   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1186   Mat            A = a->A,B = a->B;
1187   PetscErrorCode ierr;
1188   PetscReal      isend[5],irecv[5];
1189 
1190   PetscFunctionBegin;
1191   info->block_size     = (PetscReal)matin->rmap.bs;
1192   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1193   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1194   isend[3] = info->memory;  isend[4] = info->mallocs;
1195   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1196   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1197   isend[3] += info->memory;  isend[4] += info->mallocs;
1198   if (flag == MAT_LOCAL) {
1199     info->nz_used      = isend[0];
1200     info->nz_allocated = isend[1];
1201     info->nz_unneeded  = isend[2];
1202     info->memory       = isend[3];
1203     info->mallocs      = isend[4];
1204   } else if (flag == MAT_GLOBAL_MAX) {
1205     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1206     info->nz_used      = irecv[0];
1207     info->nz_allocated = irecv[1];
1208     info->nz_unneeded  = irecv[2];
1209     info->memory       = irecv[3];
1210     info->mallocs      = irecv[4];
1211   } else if (flag == MAT_GLOBAL_SUM) {
1212     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1213     info->nz_used      = irecv[0];
1214     info->nz_allocated = irecv[1];
1215     info->nz_unneeded  = irecv[2];
1216     info->memory       = irecv[3];
1217     info->mallocs      = irecv[4];
1218   } else {
1219     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1220   }
1221   info->rows_global       = (PetscReal)A->rmap.N;
1222   info->columns_global    = (PetscReal)A->cmap.N;
1223   info->rows_local        = (PetscReal)A->rmap.N;
1224   info->columns_local     = (PetscReal)A->cmap.N;
1225   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1226   info->fill_ratio_needed = 0;
1227   info->factor_mallocs    = 0;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1233 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1234 {
1235   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1236   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   switch (op) {
1241   case MAT_NEW_NONZERO_LOCATIONS:
1242   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1243   case MAT_KEEP_ZEROED_ROWS:
1244   case MAT_NEW_NONZERO_LOCATION_ERR:
1245     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1246     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1247     break;
1248   case MAT_ROW_ORIENTED:
1249     a->roworiented = flg;
1250     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1251     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1252     break;
1253   case MAT_NEW_DIAGONALS:
1254     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1255     break;
1256   case MAT_IGNORE_OFF_PROC_ENTRIES:
1257     a->donotstash = flg;
1258     break;
1259   case MAT_USE_HASH_TABLE:
1260     a->ht_flag = flg;
1261     break;
1262   case MAT_HERMITIAN:
1263     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_SYMMETRY_ETERNAL:
1267     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1268     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1269     break;
1270   case MAT_IGNORE_LOWER_TRIANGULAR:
1271     aA->ignore_ltriangular = flg;
1272     break;
1273   case MAT_ERROR_LOWER_TRIANGULAR:
1274     aA->ignore_ltriangular = flg;
1275     break;
1276   case MAT_GETROW_UPPERTRIANGULAR:
1277     aA->getrow_utriangular = flg;
1278     break;
1279   default:
1280     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1281   }
1282   PetscFunctionReturn(0);
1283 }
1284 
1285 #undef __FUNCT__
1286 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1287 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1288 {
1289   PetscErrorCode ierr;
1290   PetscFunctionBegin;
1291   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1292   PetscFunctionReturn(0);
1293 }
1294 
1295 #undef __FUNCT__
1296 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1297 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1298 {
1299   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1300   Mat            a=baij->A, b=baij->B;
1301   PetscErrorCode ierr;
1302   PetscInt       nv,m,n;
1303   PetscTruth     flg;
1304 
1305   PetscFunctionBegin;
1306   if (ll != rr){
1307     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1308     if (!flg)
1309       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1310   }
1311   if (!ll) PetscFunctionReturn(0);
1312 
1313   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1314   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1315 
1316   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1317   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1318 
1319   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1320 
1321   /* left diagonalscale the off-diagonal part */
1322   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1323 
1324   /* scale the diagonal part */
1325   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1326 
1327   /* right diagonalscale the off-diagonal part */
1328   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1329   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 #undef __FUNCT__
1334 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1335 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1336 {
1337   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1338   PetscErrorCode ierr;
1339 
1340   PetscFunctionBegin;
1341   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1342   PetscFunctionReturn(0);
1343 }
1344 
1345 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatEqual_MPISBAIJ"
1349 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1350 {
1351   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1352   Mat            a,b,c,d;
1353   PetscTruth     flg;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   a = matA->A; b = matA->B;
1358   c = matB->A; d = matB->B;
1359 
1360   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1361   if (flg) {
1362     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1363   }
1364   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatCopy_MPISBAIJ"
1370 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1371 {
1372   PetscErrorCode ierr;
1373   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1374   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1375 
1376   PetscFunctionBegin;
1377   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1378   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1379     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1380     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1381     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1382   } else {
1383     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1384     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1385   }
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1391 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1392 {
1393   PetscErrorCode ierr;
1394 
1395   PetscFunctionBegin;
1396   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1397   PetscFunctionReturn(0);
1398 }
1399 
1400 #include "petscblaslapack.h"
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1403 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1404 {
1405   PetscErrorCode ierr;
1406   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1407   PetscBLASInt   bnz,one=1;
1408   Mat_SeqSBAIJ   *xa,*ya;
1409   Mat_SeqBAIJ    *xb,*yb;
1410 
1411   PetscFunctionBegin;
1412   if (str == SAME_NONZERO_PATTERN) {
1413     PetscScalar alpha = a;
1414     xa = (Mat_SeqSBAIJ *)xx->A->data;
1415     ya = (Mat_SeqSBAIJ *)yy->A->data;
1416     bnz = PetscBLASIntCast(xa->nz);
1417     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1418     xb = (Mat_SeqBAIJ *)xx->B->data;
1419     yb = (Mat_SeqBAIJ *)yy->B->data;
1420     bnz = PetscBLASIntCast(xb->nz);
1421     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1422   } else {
1423     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1424     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1425     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1426   }
1427   PetscFunctionReturn(0);
1428 }
1429 
1430 #undef __FUNCT__
1431 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1432 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1433 {
1434   PetscErrorCode ierr;
1435   PetscInt       i;
1436   PetscTruth     flg;
1437 
1438   PetscFunctionBegin;
1439   for (i=0; i<n; i++) {
1440     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1441     if (!flg) {
1442       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1443     }
1444   }
1445   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 
1450 /* -------------------------------------------------------------------*/
1451 static struct _MatOps MatOps_Values = {
1452        MatSetValues_MPISBAIJ,
1453        MatGetRow_MPISBAIJ,
1454        MatRestoreRow_MPISBAIJ,
1455        MatMult_MPISBAIJ,
1456 /* 4*/ MatMultAdd_MPISBAIJ,
1457        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1458        MatMultAdd_MPISBAIJ,
1459        0,
1460        0,
1461        0,
1462 /*10*/ 0,
1463        0,
1464        0,
1465        MatRelax_MPISBAIJ,
1466        MatTranspose_MPISBAIJ,
1467 /*15*/ MatGetInfo_MPISBAIJ,
1468        MatEqual_MPISBAIJ,
1469        MatGetDiagonal_MPISBAIJ,
1470        MatDiagonalScale_MPISBAIJ,
1471        MatNorm_MPISBAIJ,
1472 /*20*/ MatAssemblyBegin_MPISBAIJ,
1473        MatAssemblyEnd_MPISBAIJ,
1474        0,
1475        MatSetOption_MPISBAIJ,
1476        MatZeroEntries_MPISBAIJ,
1477 /*25*/ 0,
1478        0,
1479        0,
1480        0,
1481        0,
1482 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1483        0,
1484        0,
1485        0,
1486        0,
1487 /*35*/ MatDuplicate_MPISBAIJ,
1488        0,
1489        0,
1490        0,
1491        0,
1492 /*40*/ MatAXPY_MPISBAIJ,
1493        MatGetSubMatrices_MPISBAIJ,
1494        MatIncreaseOverlap_MPISBAIJ,
1495        MatGetValues_MPISBAIJ,
1496        MatCopy_MPISBAIJ,
1497 /*45*/ 0,
1498        MatScale_MPISBAIJ,
1499        0,
1500        0,
1501        0,
1502 /*50*/ 0,
1503        0,
1504        0,
1505        0,
1506        0,
1507 /*55*/ 0,
1508        0,
1509        MatSetUnfactored_MPISBAIJ,
1510        0,
1511        MatSetValuesBlocked_MPISBAIJ,
1512 /*60*/ 0,
1513        0,
1514        0,
1515        0,
1516        0,
1517 /*65*/ 0,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*70*/ MatGetRowMaxAbs_MPISBAIJ,
1523        0,
1524        0,
1525        0,
1526        0,
1527 /*75*/ 0,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*80*/ 0,
1533        0,
1534        0,
1535        0,
1536        MatLoad_MPISBAIJ,
1537 /*85*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*90*/ 0,
1543        0,
1544        0,
1545        0,
1546        0,
1547 /*95*/ 0,
1548        0,
1549        0,
1550        0,
1551        0,
1552 /*100*/0,
1553        0,
1554        0,
1555        0,
1556        0,
1557 /*105*/0,
1558        MatRealPart_MPISBAIJ,
1559        MatImaginaryPart_MPISBAIJ,
1560        MatGetRowUpperTriangular_MPISBAIJ,
1561        MatRestoreRowUpperTriangular_MPISBAIJ
1562 };
1563 
1564 
1565 EXTERN_C_BEGIN
1566 #undef __FUNCT__
1567 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1568 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1569 {
1570   PetscFunctionBegin;
1571   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1572   *iscopy = PETSC_FALSE;
1573   PetscFunctionReturn(0);
1574 }
1575 EXTERN_C_END
1576 
1577 EXTERN_C_BEGIN
1578 #undef __FUNCT__
1579 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1580 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1581 {
1582   Mat_MPISBAIJ   *b;
1583   PetscErrorCode ierr;
1584   PetscInt       i,mbs,Mbs;
1585 
1586   PetscFunctionBegin;
1587   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1588     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
1589   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1590 
1591   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1592   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1593   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1594   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1595   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1596 
1597   B->rmap.bs = B->cmap.bs = bs;
1598   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1599   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1600 
1601   if (d_nnz) {
1602     for (i=0; i<B->rmap.n/bs; i++) {
1603       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1604     }
1605   }
1606   if (o_nnz) {
1607     for (i=0; i<B->rmap.n/bs; i++) {
1608       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1609     }
1610   }
1611   B->preallocated = PETSC_TRUE;
1612 
1613   b   = (Mat_MPISBAIJ*)B->data;
1614   mbs = B->rmap.n/bs;
1615   Mbs = B->rmap.N/bs;
1616   if (mbs*bs != B->rmap.n) {
1617     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1618   }
1619 
1620   B->rmap.bs  = bs;
1621   b->bs2 = bs*bs;
1622   b->mbs = mbs;
1623   b->nbs = mbs;
1624   b->Mbs = Mbs;
1625   b->Nbs = Mbs;
1626 
1627   for (i=0; i<=b->size; i++) {
1628     b->rangebs[i] = B->rmap.range[i]/bs;
1629   }
1630   b->rstartbs = B->rmap.rstart/bs;
1631   b->rendbs   = B->rmap.rend/bs;
1632 
1633   b->cstartbs = B->cmap.rstart/bs;
1634   b->cendbs   = B->cmap.rend/bs;
1635 
1636   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1637   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1638   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1639   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1640   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1641 
1642   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1643   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1644   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1645   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1646   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1647 
1648   /* build cache for off array entries formed */
1649   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1650 
1651   PetscFunctionReturn(0);
1652 }
1653 EXTERN_C_END
1654 
1655 /*MC
1656    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1657    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1658 
1659    Options Database Keys:
1660 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1661 
1662   Level: beginner
1663 
1664 .seealso: MatCreateMPISBAIJ
1665 M*/
1666 
1667 EXTERN_C_BEGIN
1668 #undef __FUNCT__
1669 #define __FUNCT__ "MatCreate_MPISBAIJ"
1670 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1671 {
1672   Mat_MPISBAIJ   *b;
1673   PetscErrorCode ierr;
1674   PetscTruth     flg;
1675 
1676   PetscFunctionBegin;
1677 
1678   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1679   B->data = (void*)b;
1680   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1681 
1682   B->ops->destroy    = MatDestroy_MPISBAIJ;
1683   B->ops->view       = MatView_MPISBAIJ;
1684   B->mapping    = 0;
1685   B->factor     = 0;
1686   B->assembled  = PETSC_FALSE;
1687 
1688   B->insertmode = NOT_SET_VALUES;
1689   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1690   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1691 
1692   /* build local table of row and column ownerships */
1693   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1694 
1695   /* build cache for off array entries formed */
1696   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1697   b->donotstash  = PETSC_FALSE;
1698   b->colmap      = PETSC_NULL;
1699   b->garray      = PETSC_NULL;
1700   b->roworiented = PETSC_TRUE;
1701 
1702 #if defined(PETSC_USE_MAT_SINGLE)
1703   /* stuff for MatSetValues_XXX in single precision */
1704   b->setvalueslen     = 0;
1705   b->setvaluescopy    = PETSC_NULL;
1706 #endif
1707 
1708   /* stuff used in block assembly */
1709   b->barray       = 0;
1710 
1711   /* stuff used for matrix vector multiply */
1712   b->lvec         = 0;
1713   b->Mvctx        = 0;
1714   b->slvec0       = 0;
1715   b->slvec0b      = 0;
1716   b->slvec1       = 0;
1717   b->slvec1a      = 0;
1718   b->slvec1b      = 0;
1719   b->sMvctx       = 0;
1720 
1721   /* stuff for MatGetRow() */
1722   b->rowindices   = 0;
1723   b->rowvalues    = 0;
1724   b->getrowactive = PETSC_FALSE;
1725 
1726   /* hash table stuff */
1727   b->ht           = 0;
1728   b->hd           = 0;
1729   b->ht_size      = 0;
1730   b->ht_flag      = PETSC_FALSE;
1731   b->ht_fact      = 0;
1732   b->ht_total_ct  = 0;
1733   b->ht_insert_ct = 0;
1734 
1735   b->in_loc       = 0;
1736   b->v_loc        = 0;
1737   b->n_loc        = 0;
1738   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1739     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1740     if (flg) {
1741       PetscReal fact = 1.39;
1742       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1743       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1744       if (fact <= 1.0) fact = 1.39;
1745       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1746       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1747     }
1748   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1749 
1750   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1751                                      "MatStoreValues_MPISBAIJ",
1752                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1753   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1754                                      "MatRetrieveValues_MPISBAIJ",
1755                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1756   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1757                                      "MatGetDiagonalBlock_MPISBAIJ",
1758                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1759   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1760                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1761                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1762   B->symmetric                  = PETSC_TRUE;
1763   B->structurally_symmetric     = PETSC_TRUE;
1764   B->symmetric_set              = PETSC_TRUE;
1765   B->structurally_symmetric_set = PETSC_TRUE;
1766   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1767   PetscFunctionReturn(0);
1768 }
1769 EXTERN_C_END
1770 
1771 /*MC
1772    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1773 
1774    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1775    and MATMPISBAIJ otherwise.
1776 
1777    Options Database Keys:
1778 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1779 
1780   Level: beginner
1781 
1782 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1783 M*/
1784 
1785 EXTERN_C_BEGIN
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatCreate_SBAIJ"
1788 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1789 {
1790   PetscErrorCode ierr;
1791   PetscMPIInt    size;
1792 
1793   PetscFunctionBegin;
1794   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1795   if (size == 1) {
1796     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1797   } else {
1798     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1799   }
1800   PetscFunctionReturn(0);
1801 }
1802 EXTERN_C_END
1803 
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1806 /*@C
1807    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1808    the user should preallocate the matrix storage by setting the parameters
1809    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1810    performance can be increased by more than a factor of 50.
1811 
1812    Collective on Mat
1813 
1814    Input Parameters:
1815 +  A - the matrix
1816 .  bs   - size of blockk
1817 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1818            submatrix  (same for all local rows)
1819 .  d_nnz - array containing the number of block nonzeros in the various block rows
1820            in the upper triangular and diagonal part of the in diagonal portion of the local
1821            (possibly different for each block row) or PETSC_NULL.  You must leave room
1822            for the diagonal entry even if it is zero.
1823 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1824            submatrix (same for all local rows).
1825 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1826            off-diagonal portion of the local submatrix (possibly different for
1827            each block row) or PETSC_NULL.
1828 
1829 
1830    Options Database Keys:
1831 .   -mat_no_unroll - uses code that does not unroll the loops in the
1832                      block calculations (much slower)
1833 .   -mat_block_size - size of the blocks to use
1834 
1835    Notes:
1836 
1837    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1838    than it must be used on all processors that share the object for that argument.
1839 
1840    If the *_nnz parameter is given then the *_nz parameter is ignored
1841 
1842    Storage Information:
1843    For a square global matrix we define each processor's diagonal portion
1844    to be its local rows and the corresponding columns (a square submatrix);
1845    each processor's off-diagonal portion encompasses the remainder of the
1846    local matrix (a rectangular submatrix).
1847 
1848    The user can specify preallocated storage for the diagonal part of
1849    the local submatrix with either d_nz or d_nnz (not both).  Set
1850    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1851    memory allocation.  Likewise, specify preallocated storage for the
1852    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1853 
1854    You can call MatGetInfo() to get information on how effective the preallocation was;
1855    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1856    You can also run with the option -info and look for messages with the string
1857    malloc in them to see if additional memory allocation was needed.
1858 
1859    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1860    the figure below we depict these three local rows and all columns (0-11).
1861 
1862 .vb
1863            0 1 2 3 4 5 6 7 8 9 10 11
1864           -------------------
1865    row 3  |  o o o d d d o o o o o o
1866    row 4  |  o o o d d d o o o o o o
1867    row 5  |  o o o d d d o o o o o o
1868           -------------------
1869 .ve
1870 
1871    Thus, any entries in the d locations are stored in the d (diagonal)
1872    submatrix, and any entries in the o locations are stored in the
1873    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1874    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1875 
1876    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1877    plus the diagonal part of the d matrix,
1878    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1879    In general, for PDE problems in which most nonzeros are near the diagonal,
1880    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1881    or you will get TERRIBLE performance; see the users' manual chapter on
1882    matrices.
1883 
1884    Level: intermediate
1885 
1886 .keywords: matrix, block, aij, compressed row, sparse, parallel
1887 
1888 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1889 @*/
1890 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1891 {
1892   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1893 
1894   PetscFunctionBegin;
1895   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1896   if (f) {
1897     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1898   }
1899   PetscFunctionReturn(0);
1900 }
1901 
1902 #undef __FUNCT__
1903 #define __FUNCT__ "MatCreateMPISBAIJ"
1904 /*@C
1905    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1906    (block compressed row).  For good matrix assembly performance
1907    the user should preallocate the matrix storage by setting the parameters
1908    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1909    performance can be increased by more than a factor of 50.
1910 
1911    Collective on MPI_Comm
1912 
1913    Input Parameters:
1914 +  comm - MPI communicator
1915 .  bs   - size of blockk
1916 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1917            This value should be the same as the local size used in creating the
1918            y vector for the matrix-vector product y = Ax.
1919 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1920            This value should be the same as the local size used in creating the
1921            x vector for the matrix-vector product y = Ax.
1922 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1923 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1924 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1925            submatrix  (same for all local rows)
1926 .  d_nnz - array containing the number of block nonzeros in the various block rows
1927            in the upper triangular portion of the in diagonal portion of the local
1928            (possibly different for each block block row) or PETSC_NULL.
1929            You must leave room for the diagonal entry even if it is zero.
1930 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1931            submatrix (same for all local rows).
1932 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1933            off-diagonal portion of the local submatrix (possibly different for
1934            each block row) or PETSC_NULL.
1935 
1936    Output Parameter:
1937 .  A - the matrix
1938 
1939    Options Database Keys:
1940 .   -mat_no_unroll - uses code that does not unroll the loops in the
1941                      block calculations (much slower)
1942 .   -mat_block_size - size of the blocks to use
1943 .   -mat_mpi - use the parallel matrix data structures even on one processor
1944                (defaults to using SeqBAIJ format on one processor)
1945 
1946    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1947    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
1948    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
1949    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1950 
1951    Notes:
1952    The number of rows and columns must be divisible by blocksize.
1953    This matrix type does not support complex Hermitian operation.
1954 
1955    The user MUST specify either the local or global matrix dimensions
1956    (possibly both).
1957 
1958    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1959    than it must be used on all processors that share the object for that argument.
1960 
1961    If the *_nnz parameter is given then the *_nz parameter is ignored
1962 
1963    Storage Information:
1964    For a square global matrix we define each processor's diagonal portion
1965    to be its local rows and the corresponding columns (a square submatrix);
1966    each processor's off-diagonal portion encompasses the remainder of the
1967    local matrix (a rectangular submatrix).
1968 
1969    The user can specify preallocated storage for the diagonal part of
1970    the local submatrix with either d_nz or d_nnz (not both).  Set
1971    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1972    memory allocation.  Likewise, specify preallocated storage for the
1973    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1974 
1975    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1976    the figure below we depict these three local rows and all columns (0-11).
1977 
1978 .vb
1979            0 1 2 3 4 5 6 7 8 9 10 11
1980           -------------------
1981    row 3  |  o o o d d d o o o o o o
1982    row 4  |  o o o d d d o o o o o o
1983    row 5  |  o o o d d d o o o o o o
1984           -------------------
1985 .ve
1986 
1987    Thus, any entries in the d locations are stored in the d (diagonal)
1988    submatrix, and any entries in the o locations are stored in the
1989    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1990    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1991 
1992    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1993    plus the diagonal part of the d matrix,
1994    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1995    In general, for PDE problems in which most nonzeros are near the diagonal,
1996    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1997    or you will get TERRIBLE performance; see the users' manual chapter on
1998    matrices.
1999 
2000    Level: intermediate
2001 
2002 .keywords: matrix, block, aij, compressed row, sparse, parallel
2003 
2004 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2005 @*/
2006 
2007 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2008 {
2009   PetscErrorCode ierr;
2010   PetscMPIInt    size;
2011 
2012   PetscFunctionBegin;
2013   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2014   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2015   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2016   if (size > 1) {
2017     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2018     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2019   } else {
2020     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2021     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2029 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2030 {
2031   Mat            mat;
2032   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2033   PetscErrorCode ierr;
2034   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2035   PetscScalar    *array;
2036 
2037   PetscFunctionBegin;
2038   *newmat       = 0;
2039   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2040   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2041   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2042   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2043   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2044   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2045 
2046   mat->factor       = matin->factor;
2047   mat->preallocated = PETSC_TRUE;
2048   mat->assembled    = PETSC_TRUE;
2049   mat->insertmode   = NOT_SET_VALUES;
2050 
2051   a = (Mat_MPISBAIJ*)mat->data;
2052   a->bs2   = oldmat->bs2;
2053   a->mbs   = oldmat->mbs;
2054   a->nbs   = oldmat->nbs;
2055   a->Mbs   = oldmat->Mbs;
2056   a->Nbs   = oldmat->Nbs;
2057 
2058 
2059   a->size         = oldmat->size;
2060   a->rank         = oldmat->rank;
2061   a->donotstash   = oldmat->donotstash;
2062   a->roworiented  = oldmat->roworiented;
2063   a->rowindices   = 0;
2064   a->rowvalues    = 0;
2065   a->getrowactive = PETSC_FALSE;
2066   a->barray       = 0;
2067   a->rstartbs    = oldmat->rstartbs;
2068   a->rendbs      = oldmat->rendbs;
2069   a->cstartbs    = oldmat->cstartbs;
2070   a->cendbs      = oldmat->cendbs;
2071 
2072   /* hash table stuff */
2073   a->ht           = 0;
2074   a->hd           = 0;
2075   a->ht_size      = 0;
2076   a->ht_flag      = oldmat->ht_flag;
2077   a->ht_fact      = oldmat->ht_fact;
2078   a->ht_total_ct  = 0;
2079   a->ht_insert_ct = 0;
2080 
2081   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2082   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2083   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2084   if (oldmat->colmap) {
2085 #if defined (PETSC_USE_CTABLE)
2086     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2087 #else
2088     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2089     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2090     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2091 #endif
2092   } else a->colmap = 0;
2093 
2094   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2095     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2096     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2097     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2098   } else a->garray = 0;
2099 
2100   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2101   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2102   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2103   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2104 
2105   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2106   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2107   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2108   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2109 
2110   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2111   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2113   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2114   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2115   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2116   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2117   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2122   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2123 
2124   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2125   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2126   a->sMvctx = oldmat->sMvctx;
2127   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2128 
2129   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2130   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2131   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2133   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2134   *newmat = mat;
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 #include "petscsys.h"
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatLoad_MPISBAIJ"
2142 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2143 {
2144   Mat            A;
2145   PetscErrorCode ierr;
2146   PetscInt       i,nz,j,rstart,rend;
2147   PetscScalar    *vals,*buf;
2148   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2149   MPI_Status     status;
2150   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens,mmbs;
2151   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2152   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2153   PetscInt       bs=1,Mbs,mbs,extra_rows;
2154   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2155   PetscInt       dcount,kmax,k,nzcount,tmp;
2156   int            fd;
2157 
2158   PetscFunctionBegin;
2159   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2160     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2161   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2162 
2163   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2164   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2165   if (!rank) {
2166     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2167     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2168     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2169     if (header[3] < 0) {
2170       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2171     }
2172   }
2173 
2174   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2175   M = header[1]; N = header[2];
2176 
2177   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2178 
2179   /*
2180      This code adds extra rows to make sure the number of rows is
2181      divisible by the blocksize
2182   */
2183   Mbs        = M/bs;
2184   extra_rows = bs - M + bs*(Mbs);
2185   if (extra_rows == bs) extra_rows = 0;
2186   else                  Mbs++;
2187   if (extra_rows &&!rank) {
2188     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2189   }
2190 
2191   /* determine ownership of all rows */
2192   mbs        = Mbs/size + ((Mbs % size) > rank);
2193   m          = mbs*bs;
2194   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2195   browners   = rowners + size + 1;
2196   mmbs       = PetscMPIIntCast(mbs);
2197   ierr       = MPI_Allgather(&mmbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2198   rowners[0] = 0;
2199   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2200   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2201   rstart = rowners[rank];
2202   rend   = rowners[rank+1];
2203 
2204   /* distribute row lengths to all processors */
2205   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2206   if (!rank) {
2207     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2208     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2209     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2210     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2211     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2212     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2213     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2214   } else {
2215     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2216   }
2217 
2218   if (!rank) {   /* procs[0] */
2219     /* calculate the number of nonzeros on each processor */
2220     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2221     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2222     for (i=0; i<size; i++) {
2223       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2224         procsnz[i] += rowlengths[j];
2225       }
2226     }
2227     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2228 
2229     /* determine max buffer needed and allocate it */
2230     maxnz = 0;
2231     for (i=0; i<size; i++) {
2232       maxnz = PetscMax(maxnz,procsnz[i]);
2233     }
2234     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2235 
2236     /* read in my part of the matrix column indices  */
2237     nz     = procsnz[0];
2238     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2239     mycols = ibuf;
2240     if (size == 1)  nz -= extra_rows;
2241     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2242     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2243 
2244     /* read in every ones (except the last) and ship off */
2245     for (i=1; i<size-1; i++) {
2246       nz   = procsnz[i];
2247       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2248       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2249     }
2250     /* read in the stuff for the last proc */
2251     if (size != 1) {
2252       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2253       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2254       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2255       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2256     }
2257     ierr = PetscFree(cols);CHKERRQ(ierr);
2258   } else {  /* procs[i], i>0 */
2259     /* determine buffer space needed for message */
2260     nz = 0;
2261     for (i=0; i<m; i++) {
2262       nz += locrowlens[i];
2263     }
2264     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2265     mycols = ibuf;
2266     /* receive message of column indices*/
2267     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2268     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2269     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2270   }
2271 
2272   /* loop over local rows, determining number of off diagonal entries */
2273   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2274   odlens   = dlens + (rend-rstart);
2275   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2276   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2277   masked1  = mask    + Mbs;
2278   masked2  = masked1 + Mbs;
2279   rowcount = 0; nzcount = 0;
2280   for (i=0; i<mbs; i++) {
2281     dcount  = 0;
2282     odcount = 0;
2283     for (j=0; j<bs; j++) {
2284       kmax = locrowlens[rowcount];
2285       for (k=0; k<kmax; k++) {
2286         tmp = mycols[nzcount++]/bs; /* block col. index */
2287         if (!mask[tmp]) {
2288           mask[tmp] = 1;
2289           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2290           else masked1[dcount++] = tmp; /* entry in diag portion */
2291         }
2292       }
2293       rowcount++;
2294     }
2295 
2296     dlens[i]  = dcount;  /* d_nzz[i] */
2297     odlens[i] = odcount; /* o_nzz[i] */
2298 
2299     /* zero out the mask elements we set */
2300     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2301     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2302   }
2303 
2304   /* create our matrix */
2305   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2306   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2307   ierr = MatSetType(A,type);CHKERRQ(ierr);
2308   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2309 
2310   if (!rank) {
2311     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2312     /* read in my part of the matrix numerical values  */
2313     nz = procsnz[0];
2314     vals = buf;
2315     mycols = ibuf;
2316     if (size == 1)  nz -= extra_rows;
2317     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2318     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2319 
2320     /* insert into matrix */
2321     jj      = rstart*bs;
2322     for (i=0; i<m; i++) {
2323       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2324       mycols += locrowlens[i];
2325       vals   += locrowlens[i];
2326       jj++;
2327     }
2328 
2329     /* read in other processors (except the last one) and ship out */
2330     for (i=1; i<size-1; i++) {
2331       nz   = procsnz[i];
2332       vals = buf;
2333       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2334       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2335     }
2336     /* the last proc */
2337     if (size != 1){
2338       nz   = procsnz[i] - extra_rows;
2339       vals = buf;
2340       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2341       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2342       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2343     }
2344     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2345 
2346   } else {
2347     /* receive numeric values */
2348     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2349 
2350     /* receive message of values*/
2351     vals   = buf;
2352     mycols = ibuf;
2353     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2354     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2355     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2356 
2357     /* insert into matrix */
2358     jj      = rstart*bs;
2359     for (i=0; i<m; i++) {
2360       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2361       mycols += locrowlens[i];
2362       vals   += locrowlens[i];
2363       jj++;
2364     }
2365   }
2366 
2367   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2368   ierr = PetscFree(buf);CHKERRQ(ierr);
2369   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2370   ierr = PetscFree(rowners);CHKERRQ(ierr);
2371   ierr = PetscFree(dlens);CHKERRQ(ierr);
2372   ierr = PetscFree(mask);CHKERRQ(ierr);
2373   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2374   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2375   *newmat = A;
2376   PetscFunctionReturn(0);
2377 }
2378 
2379 #undef __FUNCT__
2380 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2381 /*XXXXX@
2382    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2383 
2384    Input Parameters:
2385 .  mat  - the matrix
2386 .  fact - factor
2387 
2388    Collective on Mat
2389 
2390    Level: advanced
2391 
2392   Notes:
2393    This can also be set by the command line option: -mat_use_hash_table fact
2394 
2395 .keywords: matrix, hashtable, factor, HT
2396 
2397 .seealso: MatSetOption()
2398 @XXXXX*/
2399 
2400 
2401 #undef __FUNCT__
2402 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2403 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2404 {
2405   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2406   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2407   PetscReal      atmp;
2408   PetscReal      *work,*svalues,*rvalues;
2409   PetscErrorCode ierr;
2410   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2411   PetscMPIInt    rank,size;
2412   PetscInt       *rowners_bs,dest,count,source;
2413   PetscScalar    *va;
2414   MatScalar      *ba;
2415   MPI_Status     stat;
2416 
2417   PetscFunctionBegin;
2418   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2419   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2420   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2421 
2422   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2423   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2424 
2425   bs   = A->rmap.bs;
2426   mbs  = a->mbs;
2427   Mbs  = a->Mbs;
2428   ba   = b->a;
2429   bi   = b->i;
2430   bj   = b->j;
2431 
2432   /* find ownerships */
2433   rowners_bs = A->rmap.range;
2434 
2435   /* each proc creates an array to be distributed */
2436   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2437   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2438 
2439   /* row_max for B */
2440   if (rank != size-1){
2441     for (i=0; i<mbs; i++) {
2442       ncols = bi[1] - bi[0]; bi++;
2443       brow  = bs*i;
2444       for (j=0; j<ncols; j++){
2445         bcol = bs*(*bj);
2446         for (kcol=0; kcol<bs; kcol++){
2447           col = bcol + kcol;                 /* local col index */
2448           col += rowners_bs[rank+1];      /* global col index */
2449           for (krow=0; krow<bs; krow++){
2450             atmp = PetscAbsScalar(*ba); ba++;
2451             row = brow + krow;    /* local row index */
2452             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2453             if (work[col] < atmp) work[col] = atmp;
2454           }
2455         }
2456         bj++;
2457       }
2458     }
2459 
2460     /* send values to its owners */
2461     for (dest=rank+1; dest<size; dest++){
2462       svalues = work + rowners_bs[dest];
2463       count   = rowners_bs[dest+1]-rowners_bs[dest];
2464       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2465     }
2466   }
2467 
2468   /* receive values */
2469   if (rank){
2470     rvalues = work;
2471     count   = rowners_bs[rank+1]-rowners_bs[rank];
2472     for (source=0; source<rank; source++){
2473       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2474       /* process values */
2475       for (i=0; i<count; i++){
2476         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2477       }
2478     }
2479   }
2480 
2481   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2482   ierr = PetscFree(work);CHKERRQ(ierr);
2483   PetscFunctionReturn(0);
2484 }
2485 
2486 #undef __FUNCT__
2487 #define __FUNCT__ "MatRelax_MPISBAIJ"
2488 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2489 {
2490   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2491   PetscErrorCode ierr;
2492   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2493   PetscScalar    *x,*b,*ptr,zero=0.0;
2494   Vec            bb1;
2495 
2496   PetscFunctionBegin;
2497   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2498   if (bs > 1)
2499     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2500 
2501   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2502     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2503       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2504       its--;
2505     }
2506 
2507     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2508     while (its--){
2509 
2510       /* lower triangular part: slvec0b = - B^T*xx */
2511       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2512 
2513       /* copy xx into slvec0a */
2514       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2515       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2516       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2517       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2518 
2519       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2520 
2521       /* copy bb into slvec1a */
2522       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2523       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2524       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2525       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2526 
2527       /* set slvec1b = 0 */
2528       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2529 
2530       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2531       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2532       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2533       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2534 
2535       /* upper triangular part: bb1 = bb1 - B*x */
2536       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2537 
2538       /* local diagonal sweep */
2539       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2540     }
2541     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2542   } else {
2543     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2544   }
2545   PetscFunctionReturn(0);
2546 }
2547 
2548 #undef __FUNCT__
2549 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2550 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2551 {
2552   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2553   PetscErrorCode ierr;
2554   Vec            lvec1,bb1;
2555 
2556   PetscFunctionBegin;
2557   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2558   if (matin->rmap.bs > 1)
2559     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2560 
2561   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2562     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2563       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2564       its--;
2565     }
2566 
2567     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2568     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2569     while (its--){
2570       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2571 
2572       /* lower diagonal part: bb1 = bb - B^T*xx */
2573       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2574       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2575 
2576       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2577       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2578       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2579 
2580       /* upper diagonal part: bb1 = bb1 - B*x */
2581       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2582       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2583 
2584       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2585 
2586       /* diagonal sweep */
2587       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2588     }
2589     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2590     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2591   } else {
2592     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2593   }
2594   PetscFunctionReturn(0);
2595 }
2596 
2597