xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 59580b9c76a122b390a6cbfdb7fa0e75caa232bf)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
103       N = nrow++ - 1;  \
104       /* shift up all the later entries in this row */ \
105       for (ii=N; ii>=_i; ii--) { \
106         rp[ii+1] = rp[ii]; \
107         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
108       } \
109       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
110       rp[_i]                      = bcol;  \
111       ap[bs2*_i + bs*cidx + ridx] = value;  \
112       a_noinsert:; \
113     ailen[brow] = nrow; \
114 }
115 #ifndef MatSetValues_SeqBAIJ_B_Private
116 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
117 { \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for (_i=low; _i<high; _i++) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
140       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       b_noinsert:; \
151     bilen[brow] = nrow; \
152 }
153 #endif
154 
155 #if defined(PETSC_USE_MAT_SINGLE)
156 #undef __FUNCT__
157 #define __FUNCT__ "MatSetValues_MPISBAIJ"
158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
159 {
160   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,N = m*n;
163   MatScalar      *vsingle;
164 
165   PetscFunctionBegin;
166   if (N > b->setvalueslen) {
167     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
168     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
169     b->setvalueslen  = N;
170   }
171   vsingle = b->setvaluescopy;
172 
173   for (i=0; i<N; i++) {
174     vsingle[i] = v[i];
175   }
176   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
183 {
184   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
185   PetscErrorCode ierr;
186   PetscInt       i,N = m*n*b->bs2;
187   MatScalar      *vsingle;
188 
189   PetscFunctionBegin;
190   if (N > b->setvalueslen) {
191     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
192     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
193     b->setvalueslen  = N;
194   }
195   vsingle = b->setvaluescopy;
196   for (i=0; i<N; i++) {
197     vsingle[i] = v[i];
198   }
199   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 #endif
203 
204 /* Only add/insert a(i,j) with i<=j (blocks).
205    Any a(i,j) with i>j input by user is ingored.
206 */
207 #undef __FUNCT__
208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscTruth     roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap.rstart;
217   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
218   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
219 
220   /* Some Variables required in the macro */
221   Mat            A = baij->A;
222   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar      *aa=a->a;
225 
226   Mat            B = baij->B;
227   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar     *ba=b->a;
230 
231   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar     *ap,*bap;
234 
235   /* for stash */
236   PetscInt      n_loc, *in_loc = PETSC_NULL;
237   MatScalar     *v_loc = PETSC_NULL;
238 
239   PetscFunctionBegin;
240 
241   if (!baij->donotstash){
242     if (n > baij->n_loc) {
243       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
244       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
246       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs){
262           if (a->ignore_ltriangular){
263             continue;    /* ignore lower triangular blocks */
264           } else {
265             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
266           }
267         }
268         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
269           col = in[j] - cstart_orig;          /* local col index */
270           brow = row/bs; bcol = col/bs;
271           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
272           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
274           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
275         } else if (in[j] < 0) continue;
276 #if defined(PETSC_USE_DEBUG)
277         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
278 #endif
279         else {  /* off-diag entry (B) */
280           if (mat->was_assembled) {
281             if (!baij->colmap) {
282               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
283             }
284 #if defined (PETSC_USE_CTABLE)
285             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
286             col  = col - 1;
287 #else
288             col = baij->colmap[in[j]/bs] - 1;
289 #endif
290             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
291               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
292               col =  in[j];
293               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
294               B = baij->B;
295               b = (Mat_SeqBAIJ*)(B)->data;
296               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
297               ba=b->a;
298             } else col += in[j]%bs;
299           } else col = in[j];
300           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (!baij->donotstash) {
307         n_loc = 0;
308         for (j=0; j<n; j++){
309           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
310           in_loc[n_loc] = in[j];
311           if (roworiented) {
312             v_loc[n_loc] = v[i*n+j];
313           } else {
314             v_loc[n_loc] = v[j*m+i];
315           }
316           n_loc++;
317         }
318         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
319       }
320     }
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
328 {
329   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
330   const MatScalar *value;
331   MatScalar       *barray=baij->barray;
332   PetscTruth      roworiented = baij->roworiented;
333   PetscErrorCode  ierr;
334   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
335   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
336   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
337 
338   PetscFunctionBegin;
339   if(!barray) {
340     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
341     baij->barray = barray;
342   }
343 
344   if (roworiented) {
345     stepval = (n-1)*bs;
346   } else {
347     stepval = (m-1)*bs;
348   }
349   for (i=0; i<m; i++) {
350     if (im[i] < 0) continue;
351 #if defined(PETSC_USE_DEBUG)
352     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
353 #endif
354     if (im[i] >= rstart && im[i] < rend) {
355       row = im[i] - rstart;
356       for (j=0; j<n; j++) {
357         /* If NumCol = 1 then a copy is not required */
358         if ((roworiented) && (n == 1)) {
359           barray = (MatScalar*) v + i*bs2;
360         } else if((!roworiented) && (m == 1)) {
361           barray = (MatScalar*) v + j*bs2;
362         } else { /* Here a copy is required */
363           if (roworiented) {
364             value = v + i*(stepval+bs)*bs + j*bs;
365           } else {
366             value = v + j*(stepval+bs)*bs + i*bs;
367           }
368           for (ii=0; ii<bs; ii++,value+=stepval) {
369             for (jj=0; jj<bs; jj++) {
370               *barray++  = *value++;
371             }
372           }
373           barray -=bs2;
374         }
375 
376         if (in[j] >= cstart && in[j] < cend){
377           col  = in[j] - cstart;
378           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
379         }
380         else if (in[j] < 0) continue;
381 #if defined(PETSC_USE_DEBUG)
382         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
383 #endif
384         else {
385           if (mat->was_assembled) {
386             if (!baij->colmap) {
387               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388             }
389 
390 #if defined(PETSC_USE_DEBUG)
391 #if defined (PETSC_USE_CTABLE)
392             { PetscInt data;
393               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
394               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
395             }
396 #else
397             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
398 #endif
399 #endif
400 #if defined (PETSC_USE_CTABLE)
401 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
402             col  = (col - 1)/bs;
403 #else
404             col = (baij->colmap[in[j]] - 1)/bs;
405 #endif
406             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
407               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
408               col =  in[j];
409             }
410           }
411           else col = in[j];
412           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
413         }
414       }
415     } else {
416       if (!baij->donotstash) {
417         if (roworiented) {
418           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
419         } else {
420           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
421         }
422       }
423     }
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatGetValues_MPISBAIJ"
430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
431 {
432   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
433   PetscErrorCode ierr;
434   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
435   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
436 
437   PetscFunctionBegin;
438   for (i=0; i<m; i++) {
439     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
440     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
441     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
442       row = idxm[i] - bsrstart;
443       for (j=0; j<n; j++) {
444         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
445         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
446         if (idxn[j] >= bscstart && idxn[j] < bscend){
447           col = idxn[j] - bscstart;
448           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
449         } else {
450           if (!baij->colmap) {
451             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
452           }
453 #if defined (PETSC_USE_CTABLE)
454           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
455           data --;
456 #else
457           data = baij->colmap[idxn[j]/bs]-1;
458 #endif
459           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
460           else {
461             col  = data + idxn[j]%bs;
462             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
463           }
464         }
465       }
466     } else {
467       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
468     }
469   }
470  PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatNorm_MPISBAIJ"
475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
476 {
477   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
478   PetscErrorCode ierr;
479   PetscReal      sum[2],*lnorm2;
480 
481   PetscFunctionBegin;
482   if (baij->size == 1) {
483     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
484   } else {
485     if (type == NORM_FROBENIUS) {
486       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
487       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
488       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
489       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
490       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
491       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
492       *norm = sqrt(sum[0] + 2*sum[1]);
493       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
494     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
495       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
496       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
497       PetscReal    *rsum,*rsum2,vabs;
498       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
499       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
500       MatScalar    *v;
501 
502       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
503       rsum2 = rsum + mat->cmap.N;
504       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
505       /* Amat */
506       v = amat->a; jj = amat->j;
507       for (brow=0; brow<mbs; brow++) {
508         grow = bs*(rstart + brow);
509         nz = amat->i[brow+1] - amat->i[brow];
510         for (bcol=0; bcol<nz; bcol++){
511           gcol = bs*(rstart + *jj); jj++;
512           for (col=0; col<bs; col++){
513             for (row=0; row<bs; row++){
514               vabs = PetscAbsScalar(*v); v++;
515               rsum[gcol+col] += vabs;
516               /* non-diagonal block */
517               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
518             }
519           }
520         }
521       }
522       /* Bmat */
523       v = bmat->a; jj = bmat->j;
524       for (brow=0; brow<mbs; brow++) {
525         grow = bs*(rstart + brow);
526         nz = bmat->i[brow+1] - bmat->i[brow];
527         for (bcol=0; bcol<nz; bcol++){
528           gcol = bs*garray[*jj]; jj++;
529           for (col=0; col<bs; col++){
530             for (row=0; row<bs; row++){
531               vabs = PetscAbsScalar(*v); v++;
532               rsum[gcol+col] += vabs;
533               rsum[grow+row] += vabs;
534             }
535           }
536         }
537       }
538       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
539       *norm = 0.0;
540       for (col=0; col<mat->cmap.N; col++) {
541         if (rsum2[col] > *norm) *norm = rsum2[col];
542       }
543       ierr = PetscFree(rsum);CHKERRQ(ierr);
544     } else {
545       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
554 {
555   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
556   PetscErrorCode ierr;
557   PetscInt       nstash,reallocs;
558   InsertMode     addv;
559 
560   PetscFunctionBegin;
561   if (baij->donotstash) {
562     PetscFunctionReturn(0);
563   }
564 
565   /* make sure all processors are either in INSERTMODE or ADDMODE */
566   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
567   if (addv == (ADD_VALUES|INSERT_VALUES)) {
568     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
569   }
570   mat->insertmode = addv; /* in case this processor had no cache */
571 
572   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
573   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
574   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
575   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
576   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
577   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
584 {
585   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
586   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589   PetscInt       *row,*col,other_disassembled;
590   PetscMPIInt    n;
591   PetscTruth     r1,r2,r3;
592   MatScalar      *val;
593   InsertMode     addv = mat->insertmode;
594 
595   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
596   PetscFunctionBegin;
597 
598   if (!baij->donotstash) {
599     while (1) {
600       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
601       if (!flg) break;
602 
603       for (i=0; i<n;) {
604         /* Now identify the consecutive vals belonging to the same row */
605         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
606         if (j < n) ncols = j-i;
607         else       ncols = n-i;
608         /* Now assemble all these values with a single function call */
609         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
610         i = j;
611       }
612     }
613     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
614     /* Now process the block-stash. Since the values are stashed column-oriented,
615        set the roworiented flag to column oriented, and after MatSetValues()
616        restore the original flags */
617     r1 = baij->roworiented;
618     r2 = a->roworiented;
619     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
620     baij->roworiented = PETSC_FALSE;
621     a->roworiented    = PETSC_FALSE;
622     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
623     while (1) {
624       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
625       if (!flg) break;
626 
627       for (i=0; i<n;) {
628         /* Now identify the consecutive vals belonging to the same row */
629         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
630         if (j < n) ncols = j-i;
631         else       ncols = n-i;
632         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
633         i = j;
634       }
635     }
636     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
637     baij->roworiented = r1;
638     a->roworiented    = r2;
639     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
640   }
641 
642   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
643   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
644 
645   /* determine if any processor has disassembled, if so we must
646      also disassemble ourselfs, in order that we may reassemble. */
647   /*
648      if nonzero structure of submatrix B cannot change then we know that
649      no processor disassembled thus we can skip this stuff
650   */
651   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
652     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
653     if (mat->was_assembled && !other_disassembled) {
654       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
655     }
656   }
657 
658   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
659     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
660   }
661   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
662   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
663   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
664 
665   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
666   baij->rowvalues = 0;
667 
668   PetscFunctionReturn(0);
669 }
670 
671 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
672 #undef __FUNCT__
673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675 {
676   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode    ierr;
678   PetscInt          bs = mat->rmap.bs;
679   PetscMPIInt       size = baij->size,rank = baij->rank;
680   PetscTruth        iascii,isdraw;
681   PetscViewer       sviewer;
682   PetscViewerFormat format;
683 
684   PetscFunctionBegin;
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
687   if (iascii) {
688     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
692       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
702       PetscFunctionReturn(0);
703     } else if (format == PETSC_VIEWER_ASCII_INFO) {
704       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
705       PetscFunctionReturn(0);
706     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
707       PetscFunctionReturn(0);
708     }
709   }
710 
711   if (isdraw) {
712     PetscDraw  draw;
713     PetscTruth isnull;
714     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
715     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
716   }
717 
718   if (size == 1) {
719     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
720     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
721   } else {
722     /* assemble the entire matrix onto first processor. */
723     Mat          A;
724     Mat_SeqSBAIJ *Aloc;
725     Mat_SeqBAIJ  *Bloc;
726     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
727     MatScalar    *a;
728 
729     /* Should this be the same type as mat? */
730     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
737     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
738     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
739 
740     /* copy over the A part */
741     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
742     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
743     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
744 
745     for (i=0; i<mbs; i++) {
746       rvals[0] = bs*(baij->rstartbs + i);
747       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
748       for (j=ai[i]; j<ai[i+1]; j++) {
749         col = (baij->cstartbs+aj[j])*bs;
750         for (k=0; k<bs; k++) {
751           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
752           col++; a += bs;
753         }
754       }
755     }
756     /* copy over the B part */
757     Bloc = (Mat_SeqBAIJ*)baij->B->data;
758     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
759     for (i=0; i<mbs; i++) {
760 
761       rvals[0] = bs*(baij->rstartbs + i);
762       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
763       for (j=ai[i]; j<ai[i+1]; j++) {
764         col = baij->garray[aj[j]]*bs;
765         for (k=0; k<bs; k++) {
766           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
767           col++; a += bs;
768         }
769       }
770     }
771     ierr = PetscFree(rvals);CHKERRQ(ierr);
772     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
774     /*
775        Everyone has to call to draw the matrix since the graphics waits are
776        synchronized across all processors that share the PetscDraw object
777     */
778     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
779     if (!rank) {
780       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
781       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
782     }
783     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
784     ierr = MatDestroy(A);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 #undef __FUNCT__
790 #define __FUNCT__ "MatView_MPISBAIJ"
791 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
792 {
793   PetscErrorCode ierr;
794   PetscTruth     iascii,isdraw,issocket,isbinary;
795 
796   PetscFunctionBegin;
797   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
801   if (iascii || isdraw || issocket || isbinary) {
802     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
803   } else {
804     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatDestroy_MPISBAIJ"
811 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
812 {
813   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817 #if defined(PETSC_USE_LOG)
818   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
819 #endif
820   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
821   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
822   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
823   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
824 #if defined (PETSC_USE_CTABLE)
825   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
826 #else
827   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
828 #endif
829   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
830   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
831   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
832   if (baij->slvec0) {
833     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
834     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
835   }
836   if (baij->slvec1) {
837     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
838     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
839     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
840   }
841   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
842   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
843   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
844   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
845 #if defined(PETSC_USE_MAT_SINGLE)
846   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
847 #endif
848   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
849   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
850   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
851   ierr = PetscFree(baij);CHKERRQ(ierr);
852 
853   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
858   PetscFunctionReturn(0);
859 }
860 
861 #undef __FUNCT__
862 #define __FUNCT__ "MatMult_MPISBAIJ"
863 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
864 {
865   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
866   PetscErrorCode ierr;
867   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
868   PetscScalar    *x,*from,zero=0.0;
869 
870   PetscFunctionBegin;
871   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
872   if (nt != A->cmap.n) {
873     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
874   }
875 
876   /* diagonal part */
877   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
878   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
879 
880   /* subdiagonal part */
881   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
882   CHKMEMQ;
883   /* copy x into the vec slvec0 */
884   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
885   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
886   CHKMEMQ;
887   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
888   CHKMEMQ;
889   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
890 
891   CHKMEMQ;
892   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
893   CHKMEMQ;
894   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
895   CHKMEMQ;
896   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
897     CHKMEMQ;
898   /* supperdiagonal part */
899   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
900     CHKMEMQ;
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
906 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
907 {
908   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
909   PetscErrorCode ierr;
910   PetscInt       nt;
911 
912   PetscFunctionBegin;
913   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
914   if (nt != A->cmap.n) {
915     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
916   }
917   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
918   if (nt != A->rmap.N) {
919     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
920   }
921 
922   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
923   /* do diagonal part */
924   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
925   /* do supperdiagonal part */
926   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
927   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
928   /* do subdiagonal part */
929   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
930   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
931   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
932 
933   PetscFunctionReturn(0);
934 }
935 
936 #undef __FUNCT__
937 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
938 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
939 {
940   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
941   PetscErrorCode ierr;
942   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
943   PetscScalar    *x,*from,zero=0.0;
944 
945   PetscFunctionBegin;
946   /*
947   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
948   PetscSynchronizedFlush(A->comm);
949   */
950   /* diagonal part */
951   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
952   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
953 
954   /* subdiagonal part */
955   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
956 
957   /* copy x into the vec slvec0 */
958   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
959   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
960   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
961   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
962 
963   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
964   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
965   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
966 
967   /* supperdiagonal part */
968   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
969 
970   PetscFunctionReturn(0);
971 }
972 
973 #undef __FUNCT__
974 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
975 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
976 {
977   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
978   PetscErrorCode ierr;
979 
980   PetscFunctionBegin;
981   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
982   /* do diagonal part */
983   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
984   /* do supperdiagonal part */
985   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
986   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
987 
988   /* do subdiagonal part */
989   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
990   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
991   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
992 
993   PetscFunctionReturn(0);
994 }
995 
996 /*
997   This only works correctly for square matrices where the subblock A->A is the
998    diagonal block
999 */
1000 #undef __FUNCT__
1001 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1002 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1003 {
1004   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1005   PetscErrorCode ierr;
1006 
1007   PetscFunctionBegin;
1008   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1009   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1010   PetscFunctionReturn(0);
1011 }
1012 
1013 #undef __FUNCT__
1014 #define __FUNCT__ "MatScale_MPISBAIJ"
1015 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1016 {
1017   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1018   PetscErrorCode ierr;
1019 
1020   PetscFunctionBegin;
1021   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1022   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1023   PetscFunctionReturn(0);
1024 }
1025 
1026 #undef __FUNCT__
1027 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1028 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1029 {
1030   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1031   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1032   PetscErrorCode ierr;
1033   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1034   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1035   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1036 
1037   PetscFunctionBegin;
1038   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1039   mat->getrowactive = PETSC_TRUE;
1040 
1041   if (!mat->rowvalues && (idx || v)) {
1042     /*
1043         allocate enough space to hold information from the longest row.
1044     */
1045     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1046     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1047     PetscInt     max = 1,mbs = mat->mbs,tmp;
1048     for (i=0; i<mbs; i++) {
1049       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1050       if (max < tmp) { max = tmp; }
1051     }
1052     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1053     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1054   }
1055 
1056   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1057   lrow = row - brstart;  /* local row index */
1058 
1059   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1060   if (!v)   {pvA = 0; pvB = 0;}
1061   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1062   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1063   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1064   nztot = nzA + nzB;
1065 
1066   cmap  = mat->garray;
1067   if (v  || idx) {
1068     if (nztot) {
1069       /* Sort by increasing column numbers, assuming A and B already sorted */
1070       PetscInt imark = -1;
1071       if (v) {
1072         *v = v_p = mat->rowvalues;
1073         for (i=0; i<nzB; i++) {
1074           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1075           else break;
1076         }
1077         imark = i;
1078         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1079         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1080       }
1081       if (idx) {
1082         *idx = idx_p = mat->rowindices;
1083         if (imark > -1) {
1084           for (i=0; i<imark; i++) {
1085             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1086           }
1087         } else {
1088           for (i=0; i<nzB; i++) {
1089             if (cmap[cworkB[i]/bs] < cstart)
1090               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1091             else break;
1092           }
1093           imark = i;
1094         }
1095         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1096         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1097       }
1098     } else {
1099       if (idx) *idx = 0;
1100       if (v)   *v   = 0;
1101     }
1102   }
1103   *nz = nztot;
1104   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1105   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1111 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1112 {
1113   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1114 
1115   PetscFunctionBegin;
1116   if (!baij->getrowactive) {
1117     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1118   }
1119   baij->getrowactive = PETSC_FALSE;
1120   PetscFunctionReturn(0);
1121 }
1122 
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1125 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1126 {
1127   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1128   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1129 
1130   PetscFunctionBegin;
1131   aA->getrow_utriangular = PETSC_TRUE;
1132   PetscFunctionReturn(0);
1133 }
1134 #undef __FUNCT__
1135 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1136 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1137 {
1138   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1139   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1140 
1141   PetscFunctionBegin;
1142   aA->getrow_utriangular = PETSC_FALSE;
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1148 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1149 {
1150   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1151   PetscErrorCode ierr;
1152 
1153   PetscFunctionBegin;
1154   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1155   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1161 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1162 {
1163   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1164   PetscErrorCode ierr;
1165 
1166   PetscFunctionBegin;
1167   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1168   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1169   PetscFunctionReturn(0);
1170 }
1171 
1172 #undef __FUNCT__
1173 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1174 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1175 {
1176   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1177   PetscErrorCode ierr;
1178 
1179   PetscFunctionBegin;
1180   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1181   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1187 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1188 {
1189   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1190   Mat            A = a->A,B = a->B;
1191   PetscErrorCode ierr;
1192   PetscReal      isend[5],irecv[5];
1193 
1194   PetscFunctionBegin;
1195   info->block_size     = (PetscReal)matin->rmap.bs;
1196   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1197   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1198   isend[3] = info->memory;  isend[4] = info->mallocs;
1199   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1200   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1201   isend[3] += info->memory;  isend[4] += info->mallocs;
1202   if (flag == MAT_LOCAL) {
1203     info->nz_used      = isend[0];
1204     info->nz_allocated = isend[1];
1205     info->nz_unneeded  = isend[2];
1206     info->memory       = isend[3];
1207     info->mallocs      = isend[4];
1208   } else if (flag == MAT_GLOBAL_MAX) {
1209     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1210     info->nz_used      = irecv[0];
1211     info->nz_allocated = irecv[1];
1212     info->nz_unneeded  = irecv[2];
1213     info->memory       = irecv[3];
1214     info->mallocs      = irecv[4];
1215   } else if (flag == MAT_GLOBAL_SUM) {
1216     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1217     info->nz_used      = irecv[0];
1218     info->nz_allocated = irecv[1];
1219     info->nz_unneeded  = irecv[2];
1220     info->memory       = irecv[3];
1221     info->mallocs      = irecv[4];
1222   } else {
1223     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1224   }
1225   info->rows_global       = (PetscReal)A->rmap.N;
1226   info->columns_global    = (PetscReal)A->cmap.N;
1227   info->rows_local        = (PetscReal)A->rmap.N;
1228   info->columns_local     = (PetscReal)A->cmap.N;
1229   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1230   info->fill_ratio_needed = 0;
1231   info->factor_mallocs    = 0;
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1237 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1238 {
1239   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1240   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1241   PetscErrorCode ierr;
1242 
1243   PetscFunctionBegin;
1244   switch (op) {
1245   case MAT_NO_NEW_NONZERO_LOCATIONS:
1246   case MAT_YES_NEW_NONZERO_LOCATIONS:
1247   case MAT_COLUMNS_UNSORTED:
1248   case MAT_COLUMNS_SORTED:
1249   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1250   case MAT_KEEP_ZEROED_ROWS:
1251   case MAT_NEW_NONZERO_LOCATION_ERR:
1252     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1253     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1254     break;
1255   case MAT_ROW_ORIENTED:
1256     a->roworiented = PETSC_TRUE;
1257     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1258     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1259     break;
1260   case MAT_ROWS_SORTED:
1261   case MAT_ROWS_UNSORTED:
1262   case MAT_YES_NEW_DIAGONALS:
1263     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1264     break;
1265   case MAT_COLUMN_ORIENTED:
1266     a->roworiented = PETSC_FALSE;
1267     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1268     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1269     break;
1270   case MAT_IGNORE_OFF_PROC_ENTRIES:
1271     a->donotstash = PETSC_TRUE;
1272     break;
1273   case MAT_NO_NEW_DIAGONALS:
1274     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1275   case MAT_USE_HASH_TABLE:
1276     a->ht_flag = PETSC_TRUE;
1277     break;
1278   case MAT_NOT_SYMMETRIC:
1279   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1280   case MAT_HERMITIAN:
1281     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1282   case MAT_SYMMETRIC:
1283   case MAT_STRUCTURALLY_SYMMETRIC:
1284   case MAT_NOT_HERMITIAN:
1285   case MAT_SYMMETRY_ETERNAL:
1286   case MAT_NOT_SYMMETRY_ETERNAL:
1287     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1288     break;
1289   case MAT_IGNORE_LOWER_TRIANGULAR:
1290     aA->ignore_ltriangular = PETSC_TRUE;
1291     break;
1292   case MAT_ERROR_LOWER_TRIANGULAR:
1293     aA->ignore_ltriangular = PETSC_FALSE;
1294     break;
1295   case MAT_GETROW_UPPERTRIANGULAR:
1296     aA->getrow_utriangular = PETSC_TRUE;
1297     break;
1298   default:
1299     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1300   }
1301   PetscFunctionReturn(0);
1302 }
1303 
1304 #undef __FUNCT__
1305 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1306 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1307 {
1308   PetscErrorCode ierr;
1309   PetscFunctionBegin;
1310   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1316 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1317 {
1318   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1319   Mat            a=baij->A, b=baij->B;
1320   PetscErrorCode ierr;
1321   PetscInt       nv,m,n;
1322   PetscTruth     flg;
1323 
1324   PetscFunctionBegin;
1325   if (ll != rr){
1326     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1327     if (!flg)
1328       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1329   }
1330   if (!ll) PetscFunctionReturn(0);
1331 
1332   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1333   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1334 
1335   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1336   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1337 
1338   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1339 
1340   /* left diagonalscale the off-diagonal part */
1341   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1342 
1343   /* scale the diagonal part */
1344   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1345 
1346   /* right diagonalscale the off-diagonal part */
1347   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1348   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1354 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1355 {
1356   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1357   PetscErrorCode ierr;
1358 
1359   PetscFunctionBegin;
1360   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatEqual_MPISBAIJ"
1368 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1369 {
1370   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1371   Mat            a,b,c,d;
1372   PetscTruth     flg;
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   a = matA->A; b = matA->B;
1377   c = matB->A; d = matB->B;
1378 
1379   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1380   if (flg) {
1381     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1382   }
1383   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatCopy_MPISBAIJ"
1389 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1390 {
1391   PetscErrorCode ierr;
1392   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1393   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1394 
1395   PetscFunctionBegin;
1396   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1397   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1398     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1399     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1400     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1401   } else {
1402     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1403     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1404   }
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1410 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1411 {
1412   PetscErrorCode ierr;
1413 
1414   PetscFunctionBegin;
1415   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1416   PetscFunctionReturn(0);
1417 }
1418 
1419 #include "petscblaslapack.h"
1420 #undef __FUNCT__
1421 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1422 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1423 {
1424   PetscErrorCode ierr;
1425   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1426   PetscBLASInt   bnz,one=1;
1427   Mat_SeqSBAIJ   *xa,*ya;
1428   Mat_SeqBAIJ    *xb,*yb;
1429 
1430   PetscFunctionBegin;
1431   if (str == SAME_NONZERO_PATTERN) {
1432     PetscScalar alpha = a;
1433     xa = (Mat_SeqSBAIJ *)xx->A->data;
1434     ya = (Mat_SeqSBAIJ *)yy->A->data;
1435     bnz = (PetscBLASInt)xa->nz;
1436     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1437     xb = (Mat_SeqBAIJ *)xx->B->data;
1438     yb = (Mat_SeqBAIJ *)yy->B->data;
1439     bnz = (PetscBLASInt)xb->nz;
1440     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1441   } else {
1442     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1443     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1444     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1445   }
1446   PetscFunctionReturn(0);
1447 }
1448 
1449 #undef __FUNCT__
1450 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1451 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1452 {
1453   PetscErrorCode ierr;
1454   PetscInt       i;
1455   PetscTruth     flg;
1456 
1457   PetscFunctionBegin;
1458   for (i=0; i<n; i++) {
1459     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1460     if (!flg) {
1461       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1462     }
1463   }
1464   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1465   PetscFunctionReturn(0);
1466 }
1467 
1468 
1469 /* -------------------------------------------------------------------*/
1470 static struct _MatOps MatOps_Values = {
1471        MatSetValues_MPISBAIJ,
1472        MatGetRow_MPISBAIJ,
1473        MatRestoreRow_MPISBAIJ,
1474        MatMult_MPISBAIJ,
1475 /* 4*/ MatMultAdd_MPISBAIJ,
1476        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1477        MatMultAdd_MPISBAIJ,
1478        0,
1479        0,
1480        0,
1481 /*10*/ 0,
1482        0,
1483        0,
1484        MatRelax_MPISBAIJ,
1485        MatTranspose_MPISBAIJ,
1486 /*15*/ MatGetInfo_MPISBAIJ,
1487        MatEqual_MPISBAIJ,
1488        MatGetDiagonal_MPISBAIJ,
1489        MatDiagonalScale_MPISBAIJ,
1490        MatNorm_MPISBAIJ,
1491 /*20*/ MatAssemblyBegin_MPISBAIJ,
1492        MatAssemblyEnd_MPISBAIJ,
1493        0,
1494        MatSetOption_MPISBAIJ,
1495        MatZeroEntries_MPISBAIJ,
1496 /*25*/ 0,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*35*/ MatDuplicate_MPISBAIJ,
1507        0,
1508        0,
1509        0,
1510        0,
1511 /*40*/ MatAXPY_MPISBAIJ,
1512        MatGetSubMatrices_MPISBAIJ,
1513        MatIncreaseOverlap_MPISBAIJ,
1514        MatGetValues_MPISBAIJ,
1515        MatCopy_MPISBAIJ,
1516 /*45*/ 0,
1517        MatScale_MPISBAIJ,
1518        0,
1519        0,
1520        0,
1521 /*50*/ 0,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*55*/ 0,
1527        0,
1528        MatSetUnfactored_MPISBAIJ,
1529        0,
1530        MatSetValuesBlocked_MPISBAIJ,
1531 /*60*/ 0,
1532        0,
1533        0,
1534        0,
1535        0,
1536 /*65*/ 0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*70*/ MatGetRowMaxAbs_MPISBAIJ,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*75*/ 0,
1547        0,
1548        0,
1549        0,
1550        0,
1551 /*80*/ 0,
1552        0,
1553        0,
1554        0,
1555        MatLoad_MPISBAIJ,
1556 /*85*/ 0,
1557        0,
1558        0,
1559        0,
1560        0,
1561 /*90*/ 0,
1562        0,
1563        0,
1564        0,
1565        0,
1566 /*95*/ 0,
1567        0,
1568        0,
1569        0,
1570        0,
1571 /*100*/0,
1572        0,
1573        0,
1574        0,
1575        0,
1576 /*105*/0,
1577        MatRealPart_MPISBAIJ,
1578        MatImaginaryPart_MPISBAIJ,
1579        MatGetRowUpperTriangular_MPISBAIJ,
1580        MatRestoreRowUpperTriangular_MPISBAIJ
1581 };
1582 
1583 
1584 EXTERN_C_BEGIN
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1587 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1588 {
1589   PetscFunctionBegin;
1590   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1591   *iscopy = PETSC_FALSE;
1592   PetscFunctionReturn(0);
1593 }
1594 EXTERN_C_END
1595 
1596 EXTERN_C_BEGIN
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1599 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1600 {
1601   Mat_MPISBAIJ   *b;
1602   PetscErrorCode ierr;
1603   PetscInt       i,mbs,Mbs;
1604 
1605   PetscFunctionBegin;
1606   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1607     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
1608   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1609 
1610   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1611   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1612   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1613   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1614   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1615 
1616   B->rmap.bs = B->cmap.bs = bs;
1617   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1618   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1619 
1620   if (d_nnz) {
1621     for (i=0; i<B->rmap.n/bs; i++) {
1622       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1623     }
1624   }
1625   if (o_nnz) {
1626     for (i=0; i<B->rmap.n/bs; i++) {
1627       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1628     }
1629   }
1630   B->preallocated = PETSC_TRUE;
1631 
1632   b   = (Mat_MPISBAIJ*)B->data;
1633   mbs = B->rmap.n/bs;
1634   Mbs = B->rmap.N/bs;
1635   if (mbs*bs != B->rmap.n) {
1636     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1637   }
1638 
1639   B->rmap.bs  = bs;
1640   b->bs2 = bs*bs;
1641   b->mbs = mbs;
1642   b->nbs = mbs;
1643   b->Mbs = Mbs;
1644   b->Nbs = Mbs;
1645 
1646   for (i=0; i<=b->size; i++) {
1647     b->rangebs[i] = B->rmap.range[i]/bs;
1648   }
1649   b->rstartbs = B->rmap.rstart/bs;
1650   b->rendbs   = B->rmap.rend/bs;
1651 
1652   b->cstartbs = B->cmap.rstart/bs;
1653   b->cendbs   = B->cmap.rend/bs;
1654 
1655   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1656   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1657   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1658   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1659   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1660 
1661   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1662   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1663   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1664   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1665   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1666 
1667   /* build cache for off array entries formed */
1668   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1669 
1670   PetscFunctionReturn(0);
1671 }
1672 EXTERN_C_END
1673 
1674 /*MC
1675    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1676    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1677 
1678    Options Database Keys:
1679 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1680 
1681   Level: beginner
1682 
1683 .seealso: MatCreateMPISBAIJ
1684 M*/
1685 
1686 EXTERN_C_BEGIN
1687 #undef __FUNCT__
1688 #define __FUNCT__ "MatCreate_MPISBAIJ"
1689 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1690 {
1691   Mat_MPISBAIJ   *b;
1692   PetscErrorCode ierr;
1693   PetscTruth     flg;
1694 
1695   PetscFunctionBegin;
1696 
1697   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1698   B->data = (void*)b;
1699   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1700 
1701   B->ops->destroy    = MatDestroy_MPISBAIJ;
1702   B->ops->view       = MatView_MPISBAIJ;
1703   B->mapping    = 0;
1704   B->factor     = 0;
1705   B->assembled  = PETSC_FALSE;
1706 
1707   B->insertmode = NOT_SET_VALUES;
1708   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1709   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1710 
1711   /* build local table of row and column ownerships */
1712   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1713 
1714   /* build cache for off array entries formed */
1715   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1716   b->donotstash  = PETSC_FALSE;
1717   b->colmap      = PETSC_NULL;
1718   b->garray      = PETSC_NULL;
1719   b->roworiented = PETSC_TRUE;
1720 
1721 #if defined(PETSC_USE_MAT_SINGLE)
1722   /* stuff for MatSetValues_XXX in single precision */
1723   b->setvalueslen     = 0;
1724   b->setvaluescopy    = PETSC_NULL;
1725 #endif
1726 
1727   /* stuff used in block assembly */
1728   b->barray       = 0;
1729 
1730   /* stuff used for matrix vector multiply */
1731   b->lvec         = 0;
1732   b->Mvctx        = 0;
1733   b->slvec0       = 0;
1734   b->slvec0b      = 0;
1735   b->slvec1       = 0;
1736   b->slvec1a      = 0;
1737   b->slvec1b      = 0;
1738   b->sMvctx       = 0;
1739 
1740   /* stuff for MatGetRow() */
1741   b->rowindices   = 0;
1742   b->rowvalues    = 0;
1743   b->getrowactive = PETSC_FALSE;
1744 
1745   /* hash table stuff */
1746   b->ht           = 0;
1747   b->hd           = 0;
1748   b->ht_size      = 0;
1749   b->ht_flag      = PETSC_FALSE;
1750   b->ht_fact      = 0;
1751   b->ht_total_ct  = 0;
1752   b->ht_insert_ct = 0;
1753 
1754   b->in_loc       = 0;
1755   b->v_loc        = 0;
1756   b->n_loc        = 0;
1757   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1758     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1759     if (flg) {
1760       PetscReal fact = 1.39;
1761       ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1762       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1763       if (fact <= 1.0) fact = 1.39;
1764       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1765       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1766     }
1767   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1768 
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1770                                      "MatStoreValues_MPISBAIJ",
1771                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1773                                      "MatRetrieveValues_MPISBAIJ",
1774                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1776                                      "MatGetDiagonalBlock_MPISBAIJ",
1777                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1779                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1780                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1781   B->symmetric                  = PETSC_TRUE;
1782   B->structurally_symmetric     = PETSC_TRUE;
1783   B->symmetric_set              = PETSC_TRUE;
1784   B->structurally_symmetric_set = PETSC_TRUE;
1785   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1786   PetscFunctionReturn(0);
1787 }
1788 EXTERN_C_END
1789 
1790 /*MC
1791    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1792 
1793    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1794    and MATMPISBAIJ otherwise.
1795 
1796    Options Database Keys:
1797 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1798 
1799   Level: beginner
1800 
1801 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1802 M*/
1803 
1804 EXTERN_C_BEGIN
1805 #undef __FUNCT__
1806 #define __FUNCT__ "MatCreate_SBAIJ"
1807 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1808 {
1809   PetscErrorCode ierr;
1810   PetscMPIInt    size;
1811 
1812   PetscFunctionBegin;
1813   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1814   if (size == 1) {
1815     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1816   } else {
1817     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 EXTERN_C_END
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1825 /*@C
1826    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1827    the user should preallocate the matrix storage by setting the parameters
1828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1829    performance can be increased by more than a factor of 50.
1830 
1831    Collective on Mat
1832 
1833    Input Parameters:
1834 +  A - the matrix
1835 .  bs   - size of blockk
1836 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1837            submatrix  (same for all local rows)
1838 .  d_nnz - array containing the number of block nonzeros in the various block rows
1839            in the upper triangular and diagonal part of the in diagonal portion of the local
1840            (possibly different for each block row) or PETSC_NULL.  You must leave room
1841            for the diagonal entry even if it is zero.
1842 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1843            submatrix (same for all local rows).
1844 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1845            off-diagonal portion of the local submatrix (possibly different for
1846            each block row) or PETSC_NULL.
1847 
1848 
1849    Options Database Keys:
1850 .   -mat_no_unroll - uses code that does not unroll the loops in the
1851                      block calculations (much slower)
1852 .   -mat_block_size - size of the blocks to use
1853 
1854    Notes:
1855 
1856    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1857    than it must be used on all processors that share the object for that argument.
1858 
1859    If the *_nnz parameter is given then the *_nz parameter is ignored
1860 
1861    Storage Information:
1862    For a square global matrix we define each processor's diagonal portion
1863    to be its local rows and the corresponding columns (a square submatrix);
1864    each processor's off-diagonal portion encompasses the remainder of the
1865    local matrix (a rectangular submatrix).
1866 
1867    The user can specify preallocated storage for the diagonal part of
1868    the local submatrix with either d_nz or d_nnz (not both).  Set
1869    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1870    memory allocation.  Likewise, specify preallocated storage for the
1871    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1872 
1873    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1874    the figure below we depict these three local rows and all columns (0-11).
1875 
1876 .vb
1877            0 1 2 3 4 5 6 7 8 9 10 11
1878           -------------------
1879    row 3  |  o o o d d d o o o o o o
1880    row 4  |  o o o d d d o o o o o o
1881    row 5  |  o o o d d d o o o o o o
1882           -------------------
1883 .ve
1884 
1885    Thus, any entries in the d locations are stored in the d (diagonal)
1886    submatrix, and any entries in the o locations are stored in the
1887    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1888    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1889 
1890    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1891    plus the diagonal part of the d matrix,
1892    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1893    In general, for PDE problems in which most nonzeros are near the diagonal,
1894    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1895    or you will get TERRIBLE performance; see the users' manual chapter on
1896    matrices.
1897 
1898    Level: intermediate
1899 
1900 .keywords: matrix, block, aij, compressed row, sparse, parallel
1901 
1902 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1903 @*/
1904 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1905 {
1906   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1907 
1908   PetscFunctionBegin;
1909   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1910   if (f) {
1911     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1912   }
1913   PetscFunctionReturn(0);
1914 }
1915 
1916 #undef __FUNCT__
1917 #define __FUNCT__ "MatCreateMPISBAIJ"
1918 /*@C
1919    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1920    (block compressed row).  For good matrix assembly performance
1921    the user should preallocate the matrix storage by setting the parameters
1922    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1923    performance can be increased by more than a factor of 50.
1924 
1925    Collective on MPI_Comm
1926 
1927    Input Parameters:
1928 +  comm - MPI communicator
1929 .  bs   - size of blockk
1930 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1931            This value should be the same as the local size used in creating the
1932            y vector for the matrix-vector product y = Ax.
1933 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1934            This value should be the same as the local size used in creating the
1935            x vector for the matrix-vector product y = Ax.
1936 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1937 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1938 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1939            submatrix  (same for all local rows)
1940 .  d_nnz - array containing the number of block nonzeros in the various block rows
1941            in the upper triangular portion of the in diagonal portion of the local
1942            (possibly different for each block block row) or PETSC_NULL.
1943            You must leave room for the diagonal entry even if it is zero.
1944 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1945            submatrix (same for all local rows).
1946 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1947            off-diagonal portion of the local submatrix (possibly different for
1948            each block row) or PETSC_NULL.
1949 
1950    Output Parameter:
1951 .  A - the matrix
1952 
1953    Options Database Keys:
1954 .   -mat_no_unroll - uses code that does not unroll the loops in the
1955                      block calculations (much slower)
1956 .   -mat_block_size - size of the blocks to use
1957 .   -mat_mpi - use the parallel matrix data structures even on one processor
1958                (defaults to using SeqBAIJ format on one processor)
1959 
1960    Notes:
1961    The number of rows and columns must be divisible by blocksize.
1962 
1963    The user MUST specify either the local or global matrix dimensions
1964    (possibly both).
1965 
1966    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1967    than it must be used on all processors that share the object for that argument.
1968 
1969    If the *_nnz parameter is given then the *_nz parameter is ignored
1970 
1971    Storage Information:
1972    For a square global matrix we define each processor's diagonal portion
1973    to be its local rows and the corresponding columns (a square submatrix);
1974    each processor's off-diagonal portion encompasses the remainder of the
1975    local matrix (a rectangular submatrix).
1976 
1977    The user can specify preallocated storage for the diagonal part of
1978    the local submatrix with either d_nz or d_nnz (not both).  Set
1979    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1980    memory allocation.  Likewise, specify preallocated storage for the
1981    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1982 
1983    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1984    the figure below we depict these three local rows and all columns (0-11).
1985 
1986 .vb
1987            0 1 2 3 4 5 6 7 8 9 10 11
1988           -------------------
1989    row 3  |  o o o d d d o o o o o o
1990    row 4  |  o o o d d d o o o o o o
1991    row 5  |  o o o d d d o o o o o o
1992           -------------------
1993 .ve
1994 
1995    Thus, any entries in the d locations are stored in the d (diagonal)
1996    submatrix, and any entries in the o locations are stored in the
1997    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1998    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1999 
2000    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2001    plus the diagonal part of the d matrix,
2002    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2003    In general, for PDE problems in which most nonzeros are near the diagonal,
2004    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2005    or you will get TERRIBLE performance; see the users' manual chapter on
2006    matrices.
2007 
2008    Level: intermediate
2009 
2010 .keywords: matrix, block, aij, compressed row, sparse, parallel
2011 
2012 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2013 @*/
2014 
2015 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2016 {
2017   PetscErrorCode ierr;
2018   PetscMPIInt    size;
2019 
2020   PetscFunctionBegin;
2021   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2022   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2023   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2024   if (size > 1) {
2025     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2026     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2027   } else {
2028     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2029     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2030   }
2031   PetscFunctionReturn(0);
2032 }
2033 
2034 
2035 #undef __FUNCT__
2036 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2037 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2038 {
2039   Mat            mat;
2040   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2041   PetscErrorCode ierr;
2042   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2043   PetscScalar    *array;
2044 
2045   PetscFunctionBegin;
2046   *newmat       = 0;
2047   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2048   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2049   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2050   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2051   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2052   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2053 
2054   mat->factor       = matin->factor;
2055   mat->preallocated = PETSC_TRUE;
2056   mat->assembled    = PETSC_TRUE;
2057   mat->insertmode   = NOT_SET_VALUES;
2058 
2059   a = (Mat_MPISBAIJ*)mat->data;
2060   a->bs2   = oldmat->bs2;
2061   a->mbs   = oldmat->mbs;
2062   a->nbs   = oldmat->nbs;
2063   a->Mbs   = oldmat->Mbs;
2064   a->Nbs   = oldmat->Nbs;
2065 
2066 
2067   a->size         = oldmat->size;
2068   a->rank         = oldmat->rank;
2069   a->donotstash   = oldmat->donotstash;
2070   a->roworiented  = oldmat->roworiented;
2071   a->rowindices   = 0;
2072   a->rowvalues    = 0;
2073   a->getrowactive = PETSC_FALSE;
2074   a->barray       = 0;
2075   a->rstartbs    = oldmat->rstartbs;
2076   a->rendbs      = oldmat->rendbs;
2077   a->cstartbs    = oldmat->cstartbs;
2078   a->cendbs      = oldmat->cendbs;
2079 
2080   /* hash table stuff */
2081   a->ht           = 0;
2082   a->hd           = 0;
2083   a->ht_size      = 0;
2084   a->ht_flag      = oldmat->ht_flag;
2085   a->ht_fact      = oldmat->ht_fact;
2086   a->ht_total_ct  = 0;
2087   a->ht_insert_ct = 0;
2088 
2089   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2090   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2091   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2092   if (oldmat->colmap) {
2093 #if defined (PETSC_USE_CTABLE)
2094     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2095 #else
2096     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2097     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2098     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2099 #endif
2100   } else a->colmap = 0;
2101 
2102   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2103     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2104     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2105     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2106   } else a->garray = 0;
2107 
2108   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2109   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2110   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2111   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2112 
2113   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2114   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2115   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2116   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2117 
2118   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2119   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2120   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2121   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2122   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2123   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2124   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2125   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2126   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2127   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2128   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2129   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2130   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2131 
2132   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2133   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2134   a->sMvctx = oldmat->sMvctx;
2135   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2136 
2137   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2139   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2140   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2141   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2142   *newmat = mat;
2143   PetscFunctionReturn(0);
2144 }
2145 
2146 #include "petscsys.h"
2147 
2148 #undef __FUNCT__
2149 #define __FUNCT__ "MatLoad_MPISBAIJ"
2150 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2151 {
2152   Mat            A;
2153   PetscErrorCode ierr;
2154   PetscInt       i,nz,j,rstart,rend;
2155   PetscScalar    *vals,*buf;
2156   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2157   MPI_Status     status;
2158   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2159   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2160   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2161   PetscInt       bs=1,Mbs,mbs,extra_rows;
2162   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2163   PetscInt       dcount,kmax,k,nzcount,tmp;
2164   int            fd;
2165 
2166   PetscFunctionBegin;
2167   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2168     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2169   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2170 
2171   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2172   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2173   if (!rank) {
2174     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2175     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2176     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2177     if (header[3] < 0) {
2178       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2179     }
2180   }
2181 
2182   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2183   M = header[1]; N = header[2];
2184 
2185   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2186 
2187   /*
2188      This code adds extra rows to make sure the number of rows is
2189      divisible by the blocksize
2190   */
2191   Mbs        = M/bs;
2192   extra_rows = bs - M + bs*(Mbs);
2193   if (extra_rows == bs) extra_rows = 0;
2194   else                  Mbs++;
2195   if (extra_rows &&!rank) {
2196     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2197   }
2198 
2199   /* determine ownership of all rows */
2200   mbs        = Mbs/size + ((Mbs % size) > rank);
2201   m          = mbs*bs;
2202   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2203   browners   = rowners + size + 1;
2204   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2205   rowners[0] = 0;
2206   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2207   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2208   rstart = rowners[rank];
2209   rend   = rowners[rank+1];
2210 
2211   /* distribute row lengths to all processors */
2212   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2213   if (!rank) {
2214     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2215     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2216     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2217     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2218     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2219     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2220     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2221   } else {
2222     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2223   }
2224 
2225   if (!rank) {   /* procs[0] */
2226     /* calculate the number of nonzeros on each processor */
2227     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2228     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2229     for (i=0; i<size; i++) {
2230       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2231         procsnz[i] += rowlengths[j];
2232       }
2233     }
2234     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2235 
2236     /* determine max buffer needed and allocate it */
2237     maxnz = 0;
2238     for (i=0; i<size; i++) {
2239       maxnz = PetscMax(maxnz,procsnz[i]);
2240     }
2241     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2242 
2243     /* read in my part of the matrix column indices  */
2244     nz     = procsnz[0];
2245     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2246     mycols = ibuf;
2247     if (size == 1)  nz -= extra_rows;
2248     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2249     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2250 
2251     /* read in every ones (except the last) and ship off */
2252     for (i=1; i<size-1; i++) {
2253       nz   = procsnz[i];
2254       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2255       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2256     }
2257     /* read in the stuff for the last proc */
2258     if (size != 1) {
2259       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2260       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2261       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2262       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2263     }
2264     ierr = PetscFree(cols);CHKERRQ(ierr);
2265   } else {  /* procs[i], i>0 */
2266     /* determine buffer space needed for message */
2267     nz = 0;
2268     for (i=0; i<m; i++) {
2269       nz += locrowlens[i];
2270     }
2271     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2272     mycols = ibuf;
2273     /* receive message of column indices*/
2274     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2275     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2276     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2277   }
2278 
2279   /* loop over local rows, determining number of off diagonal entries */
2280   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2281   odlens   = dlens + (rend-rstart);
2282   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2283   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2284   masked1  = mask    + Mbs;
2285   masked2  = masked1 + Mbs;
2286   rowcount = 0; nzcount = 0;
2287   for (i=0; i<mbs; i++) {
2288     dcount  = 0;
2289     odcount = 0;
2290     for (j=0; j<bs; j++) {
2291       kmax = locrowlens[rowcount];
2292       for (k=0; k<kmax; k++) {
2293         tmp = mycols[nzcount++]/bs; /* block col. index */
2294         if (!mask[tmp]) {
2295           mask[tmp] = 1;
2296           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2297           else masked1[dcount++] = tmp; /* entry in diag portion */
2298         }
2299       }
2300       rowcount++;
2301     }
2302 
2303     dlens[i]  = dcount;  /* d_nzz[i] */
2304     odlens[i] = odcount; /* o_nzz[i] */
2305 
2306     /* zero out the mask elements we set */
2307     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2308     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2309   }
2310 
2311   /* create our matrix */
2312   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2313   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2314   ierr = MatSetType(A,type);CHKERRQ(ierr);
2315   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2316   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2317 
2318   if (!rank) {
2319     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2320     /* read in my part of the matrix numerical values  */
2321     nz = procsnz[0];
2322     vals = buf;
2323     mycols = ibuf;
2324     if (size == 1)  nz -= extra_rows;
2325     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2326     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2327 
2328     /* insert into matrix */
2329     jj      = rstart*bs;
2330     for (i=0; i<m; i++) {
2331       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2332       mycols += locrowlens[i];
2333       vals   += locrowlens[i];
2334       jj++;
2335     }
2336 
2337     /* read in other processors (except the last one) and ship out */
2338     for (i=1; i<size-1; i++) {
2339       nz   = procsnz[i];
2340       vals = buf;
2341       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2342       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2343     }
2344     /* the last proc */
2345     if (size != 1){
2346       nz   = procsnz[i] - extra_rows;
2347       vals = buf;
2348       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2349       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2350       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2351     }
2352     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2353 
2354   } else {
2355     /* receive numeric values */
2356     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2357 
2358     /* receive message of values*/
2359     vals   = buf;
2360     mycols = ibuf;
2361     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2362     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2363     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2364 
2365     /* insert into matrix */
2366     jj      = rstart*bs;
2367     for (i=0; i<m; i++) {
2368       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2369       mycols += locrowlens[i];
2370       vals   += locrowlens[i];
2371       jj++;
2372     }
2373   }
2374 
2375   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2376   ierr = PetscFree(buf);CHKERRQ(ierr);
2377   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2378   ierr = PetscFree(rowners);CHKERRQ(ierr);
2379   ierr = PetscFree(dlens);CHKERRQ(ierr);
2380   ierr = PetscFree(mask);CHKERRQ(ierr);
2381   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2382   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2383   *newmat = A;
2384   PetscFunctionReturn(0);
2385 }
2386 
2387 #undef __FUNCT__
2388 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2389 /*XXXXX@
2390    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2391 
2392    Input Parameters:
2393 .  mat  - the matrix
2394 .  fact - factor
2395 
2396    Collective on Mat
2397 
2398    Level: advanced
2399 
2400   Notes:
2401    This can also be set by the command line option: -mat_use_hash_table fact
2402 
2403 .keywords: matrix, hashtable, factor, HT
2404 
2405 .seealso: MatSetOption()
2406 @XXXXX*/
2407 
2408 
2409 #undef __FUNCT__
2410 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2411 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2412 {
2413   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2414   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2415   PetscReal      atmp;
2416   PetscReal      *work,*svalues,*rvalues;
2417   PetscErrorCode ierr;
2418   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2419   PetscMPIInt    rank,size;
2420   PetscInt       *rowners_bs,dest,count,source;
2421   PetscScalar    *va;
2422   MatScalar      *ba;
2423   MPI_Status     stat;
2424 
2425   PetscFunctionBegin;
2426   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2427   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2428   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2429 
2430   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2431   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2432 
2433   bs   = A->rmap.bs;
2434   mbs  = a->mbs;
2435   Mbs  = a->Mbs;
2436   ba   = b->a;
2437   bi   = b->i;
2438   bj   = b->j;
2439 
2440   /* find ownerships */
2441   rowners_bs = A->rmap.range;
2442 
2443   /* each proc creates an array to be distributed */
2444   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2445   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2446 
2447   /* row_max for B */
2448   if (rank != size-1){
2449     for (i=0; i<mbs; i++) {
2450       ncols = bi[1] - bi[0]; bi++;
2451       brow  = bs*i;
2452       for (j=0; j<ncols; j++){
2453         bcol = bs*(*bj);
2454         for (kcol=0; kcol<bs; kcol++){
2455           col = bcol + kcol;                 /* local col index */
2456           col += rowners_bs[rank+1];      /* global col index */
2457           for (krow=0; krow<bs; krow++){
2458             atmp = PetscAbsScalar(*ba); ba++;
2459             row = brow + krow;    /* local row index */
2460             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2461             if (work[col] < atmp) work[col] = atmp;
2462           }
2463         }
2464         bj++;
2465       }
2466     }
2467 
2468     /* send values to its owners */
2469     for (dest=rank+1; dest<size; dest++){
2470       svalues = work + rowners_bs[dest];
2471       count   = rowners_bs[dest+1]-rowners_bs[dest];
2472       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2473     }
2474   }
2475 
2476   /* receive values */
2477   if (rank){
2478     rvalues = work;
2479     count   = rowners_bs[rank+1]-rowners_bs[rank];
2480     for (source=0; source<rank; source++){
2481       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2482       /* process values */
2483       for (i=0; i<count; i++){
2484         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2485       }
2486     }
2487   }
2488 
2489   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2490   ierr = PetscFree(work);CHKERRQ(ierr);
2491   PetscFunctionReturn(0);
2492 }
2493 
2494 #undef __FUNCT__
2495 #define __FUNCT__ "MatRelax_MPISBAIJ"
2496 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2497 {
2498   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2499   PetscErrorCode ierr;
2500   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2501   PetscScalar    *x,*b,*ptr,zero=0.0;
2502   Vec            bb1;
2503 
2504   PetscFunctionBegin;
2505   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2506   if (bs > 1)
2507     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2508 
2509   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2510     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2511       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2512       its--;
2513     }
2514 
2515     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2516     while (its--){
2517 
2518       /* lower triangular part: slvec0b = - B^T*xx */
2519       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2520 
2521       /* copy xx into slvec0a */
2522       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2523       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2524       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2525       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2526 
2527       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2528 
2529       /* copy bb into slvec1a */
2530       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2531       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2532       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2533       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2534 
2535       /* set slvec1b = 0 */
2536       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2537 
2538       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2539       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2540       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2541       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2542 
2543       /* upper triangular part: bb1 = bb1 - B*x */
2544       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2545 
2546       /* local diagonal sweep */
2547       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2548     }
2549     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2550   } else {
2551     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2552   }
2553   PetscFunctionReturn(0);
2554 }
2555 
2556 #undef __FUNCT__
2557 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2558 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2559 {
2560   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2561   PetscErrorCode ierr;
2562   Vec            lvec1,bb1;
2563 
2564   PetscFunctionBegin;
2565   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2566   if (matin->rmap.bs > 1)
2567     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2568 
2569   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2570     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2571       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2572       its--;
2573     }
2574 
2575     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2576     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2577     while (its--){
2578       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2579 
2580       /* lower diagonal part: bb1 = bb - B^T*xx */
2581       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2582       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2583 
2584       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2585       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2586       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2587 
2588       /* upper diagonal part: bb1 = bb1 - B*x */
2589       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2590       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2591 
2592       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2593 
2594       /* diagonal sweep */
2595       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2596     }
2597     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2598     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2599   } else {
2600     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2601   }
2602   PetscFunctionReturn(0);
2603 }
2604 
2605