xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2b6de112da0fc1f51a6da7a7bb5eef438765bf99)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat,Vec,PetscInt[]);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
103       N = nrow++ - 1;  \
104       /* shift up all the later entries in this row */ \
105       for (ii=N; ii>=_i; ii--) { \
106         rp[ii+1] = rp[ii]; \
107         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
108       } \
109       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
110       rp[_i]                      = bcol;  \
111       ap[bs2*_i + bs*cidx + ridx] = value;  \
112       a_noinsert:; \
113     ailen[brow] = nrow; \
114 }
115 #ifndef MatSetValues_SeqBAIJ_B_Private
116 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
117 { \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for (_i=low; _i<high; _i++) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
140       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       b_noinsert:; \
151     bilen[brow] = nrow; \
152 }
153 #endif
154 
155 #if defined(PETSC_USE_MAT_SINGLE)
156 #undef __FUNCT__
157 #define __FUNCT__ "MatSetValues_MPISBAIJ"
158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
159 {
160   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,N = m*n;
163   MatScalar      *vsingle;
164 
165   PetscFunctionBegin;
166   if (N > b->setvalueslen) {
167     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
168     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
169     b->setvalueslen  = N;
170   }
171   vsingle = b->setvaluescopy;
172 
173   for (i=0; i<N; i++) {
174     vsingle[i] = v[i];
175   }
176   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
183 {
184   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
185   PetscErrorCode ierr;
186   PetscInt       i,N = m*n*b->bs2;
187   MatScalar      *vsingle;
188 
189   PetscFunctionBegin;
190   if (N > b->setvalueslen) {
191     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
192     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
193     b->setvalueslen  = N;
194   }
195   vsingle = b->setvaluescopy;
196   for (i=0; i<N; i++) {
197     vsingle[i] = v[i];
198   }
199   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 #endif
203 
204 /* Only add/insert a(i,j) with i<=j (blocks).
205    Any a(i,j) with i>j input by user is ingored.
206 */
207 #undef __FUNCT__
208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscTruth     roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap.rstart;
217   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
218   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
219 
220   /* Some Variables required in the macro */
221   Mat            A = baij->A;
222   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar      *aa=a->a;
225 
226   Mat            B = baij->B;
227   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar     *ba=b->a;
230 
231   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar     *ap,*bap;
234 
235   /* for stash */
236   PetscInt      n_loc, *in_loc = PETSC_NULL;
237   MatScalar     *v_loc = PETSC_NULL;
238 
239   PetscFunctionBegin;
240 
241   if (!baij->donotstash){
242     if (n > baij->n_loc) {
243       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
244       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
246       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs){
262           if (a->ignore_ltriangular){
263             continue;    /* ignore lower triangular blocks */
264           } else {
265             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR,PETSC_TRUE)");
266           }
267         }
268         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
269           col = in[j] - cstart_orig;          /* local col index */
270           brow = row/bs; bcol = col/bs;
271           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
272           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
274           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
275         } else if (in[j] < 0) continue;
276 #if defined(PETSC_USE_DEBUG)
277         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
278 #endif
279         else {  /* off-diag entry (B) */
280           if (mat->was_assembled) {
281             if (!baij->colmap) {
282               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
283             }
284 #if defined (PETSC_USE_CTABLE)
285             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
286             col  = col - 1;
287 #else
288             col = baij->colmap[in[j]/bs] - 1;
289 #endif
290             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
291               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
292               col =  in[j];
293               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
294               B = baij->B;
295               b = (Mat_SeqBAIJ*)(B)->data;
296               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
297               ba=b->a;
298             } else col += in[j]%bs;
299           } else col = in[j];
300           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (!baij->donotstash) {
307         n_loc = 0;
308         for (j=0; j<n; j++){
309           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
310           in_loc[n_loc] = in[j];
311           if (roworiented) {
312             v_loc[n_loc] = v[i*n+j];
313           } else {
314             v_loc[n_loc] = v[j*m+i];
315           }
316           n_loc++;
317         }
318         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
319       }
320     }
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
328 {
329   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
330   const MatScalar *value;
331   MatScalar       *barray=baij->barray;
332   PetscTruth      roworiented = baij->roworiented;
333   PetscErrorCode  ierr;
334   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
335   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
336   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
337 
338   PetscFunctionBegin;
339   if(!barray) {
340     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
341     baij->barray = barray;
342   }
343 
344   if (roworiented) {
345     stepval = (n-1)*bs;
346   } else {
347     stepval = (m-1)*bs;
348   }
349   for (i=0; i<m; i++) {
350     if (im[i] < 0) continue;
351 #if defined(PETSC_USE_DEBUG)
352     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
353 #endif
354     if (im[i] >= rstart && im[i] < rend) {
355       row = im[i] - rstart;
356       for (j=0; j<n; j++) {
357         /* If NumCol = 1 then a copy is not required */
358         if ((roworiented) && (n == 1)) {
359           barray = (MatScalar*) v + i*bs2;
360         } else if((!roworiented) && (m == 1)) {
361           barray = (MatScalar*) v + j*bs2;
362         } else { /* Here a copy is required */
363           if (roworiented) {
364             value = v + i*(stepval+bs)*bs + j*bs;
365           } else {
366             value = v + j*(stepval+bs)*bs + i*bs;
367           }
368           for (ii=0; ii<bs; ii++,value+=stepval) {
369             for (jj=0; jj<bs; jj++) {
370               *barray++  = *value++;
371             }
372           }
373           barray -=bs2;
374         }
375 
376         if (in[j] >= cstart && in[j] < cend){
377           col  = in[j] - cstart;
378           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
379         }
380         else if (in[j] < 0) continue;
381 #if defined(PETSC_USE_DEBUG)
382         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
383 #endif
384         else {
385           if (mat->was_assembled) {
386             if (!baij->colmap) {
387               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388             }
389 
390 #if defined(PETSC_USE_DEBUG)
391 #if defined (PETSC_USE_CTABLE)
392             { PetscInt data;
393               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
394               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
395             }
396 #else
397             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
398 #endif
399 #endif
400 #if defined (PETSC_USE_CTABLE)
401 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
402             col  = (col - 1)/bs;
403 #else
404             col = (baij->colmap[in[j]] - 1)/bs;
405 #endif
406             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
407               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
408               col =  in[j];
409             }
410           }
411           else col = in[j];
412           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
413         }
414       }
415     } else {
416       if (!baij->donotstash) {
417         if (roworiented) {
418           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
419         } else {
420           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
421         }
422       }
423     }
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatGetValues_MPISBAIJ"
430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
431 {
432   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
433   PetscErrorCode ierr;
434   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
435   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
436 
437   PetscFunctionBegin;
438   for (i=0; i<m; i++) {
439     if (idxm[i] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]); */
440     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
441     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
442       row = idxm[i] - bsrstart;
443       for (j=0; j<n; j++) {
444         if (idxn[j] < 0) continue; /* SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]); */
445         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
446         if (idxn[j] >= bscstart && idxn[j] < bscend){
447           col = idxn[j] - bscstart;
448           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
449         } else {
450           if (!baij->colmap) {
451             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
452           }
453 #if defined (PETSC_USE_CTABLE)
454           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
455           data --;
456 #else
457           data = baij->colmap[idxn[j]/bs]-1;
458 #endif
459           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
460           else {
461             col  = data + idxn[j]%bs;
462             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
463           }
464         }
465       }
466     } else {
467       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
468     }
469   }
470  PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatNorm_MPISBAIJ"
475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
476 {
477   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
478   PetscErrorCode ierr;
479   PetscReal      sum[2],*lnorm2;
480 
481   PetscFunctionBegin;
482   if (baij->size == 1) {
483     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
484   } else {
485     if (type == NORM_FROBENIUS) {
486       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
487       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
488       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
489       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
490       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
491       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
492       *norm = sqrt(sum[0] + 2*sum[1]);
493       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
494     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
495       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
496       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
497       PetscReal    *rsum,*rsum2,vabs;
498       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
499       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
500       MatScalar    *v;
501 
502       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
503       rsum2 = rsum + mat->cmap.N;
504       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
505       /* Amat */
506       v = amat->a; jj = amat->j;
507       for (brow=0; brow<mbs; brow++) {
508         grow = bs*(rstart + brow);
509         nz = amat->i[brow+1] - amat->i[brow];
510         for (bcol=0; bcol<nz; bcol++){
511           gcol = bs*(rstart + *jj); jj++;
512           for (col=0; col<bs; col++){
513             for (row=0; row<bs; row++){
514               vabs = PetscAbsScalar(*v); v++;
515               rsum[gcol+col] += vabs;
516               /* non-diagonal block */
517               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
518             }
519           }
520         }
521       }
522       /* Bmat */
523       v = bmat->a; jj = bmat->j;
524       for (brow=0; brow<mbs; brow++) {
525         grow = bs*(rstart + brow);
526         nz = bmat->i[brow+1] - bmat->i[brow];
527         for (bcol=0; bcol<nz; bcol++){
528           gcol = bs*garray[*jj]; jj++;
529           for (col=0; col<bs; col++){
530             for (row=0; row<bs; row++){
531               vabs = PetscAbsScalar(*v); v++;
532               rsum[gcol+col] += vabs;
533               rsum[grow+row] += vabs;
534             }
535           }
536         }
537       }
538       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,((PetscObject)mat)->comm);CHKERRQ(ierr);
539       *norm = 0.0;
540       for (col=0; col<mat->cmap.N; col++) {
541         if (rsum2[col] > *norm) *norm = rsum2[col];
542       }
543       ierr = PetscFree(rsum);CHKERRQ(ierr);
544     } else {
545       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
554 {
555   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
556   PetscErrorCode ierr;
557   PetscInt       nstash,reallocs;
558   InsertMode     addv;
559 
560   PetscFunctionBegin;
561   if (baij->donotstash) {
562     PetscFunctionReturn(0);
563   }
564 
565   /* make sure all processors are either in INSERTMODE or ADDMODE */
566   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,((PetscObject)mat)->comm);CHKERRQ(ierr);
567   if (addv == (ADD_VALUES|INSERT_VALUES)) {
568     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
569   }
570   mat->insertmode = addv; /* in case this processor had no cache */
571 
572   ierr = MatStashScatterBegin_Private(mat,&mat->stash,mat->rmap.range);CHKERRQ(ierr);
573   ierr = MatStashScatterBegin_Private(mat,&mat->bstash,baij->rangebs);CHKERRQ(ierr);
574   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
575   ierr = PetscInfo2(mat,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
576   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
577   ierr = PetscInfo2(mat,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
584 {
585   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
586   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589   PetscInt       *row,*col,other_disassembled;
590   PetscMPIInt    n;
591   PetscTruth     r1,r2,r3;
592   MatScalar      *val;
593   InsertMode     addv = mat->insertmode;
594 
595   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
596   PetscFunctionBegin;
597 
598   if (!baij->donotstash) {
599     while (1) {
600       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
601       if (!flg) break;
602 
603       for (i=0; i<n;) {
604         /* Now identify the consecutive vals belonging to the same row */
605         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
606         if (j < n) ncols = j-i;
607         else       ncols = n-i;
608         /* Now assemble all these values with a single function call */
609         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
610         i = j;
611       }
612     }
613     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
614     /* Now process the block-stash. Since the values are stashed column-oriented,
615        set the roworiented flag to column oriented, and after MatSetValues()
616        restore the original flags */
617     r1 = baij->roworiented;
618     r2 = a->roworiented;
619     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
620     baij->roworiented = PETSC_FALSE;
621     a->roworiented    = PETSC_FALSE;
622     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
623     while (1) {
624       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
625       if (!flg) break;
626 
627       for (i=0; i<n;) {
628         /* Now identify the consecutive vals belonging to the same row */
629         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
630         if (j < n) ncols = j-i;
631         else       ncols = n-i;
632         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
633         i = j;
634       }
635     }
636     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
637     baij->roworiented = r1;
638     a->roworiented    = r2;
639     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
640   }
641 
642   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
643   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
644 
645   /* determine if any processor has disassembled, if so we must
646      also disassemble ourselfs, in order that we may reassemble. */
647   /*
648      if nonzero structure of submatrix B cannot change then we know that
649      no processor disassembled thus we can skip this stuff
650   */
651   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
652     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,((PetscObject)mat)->comm);CHKERRQ(ierr);
653     if (mat->was_assembled && !other_disassembled) {
654       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
655     }
656   }
657 
658   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
659     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
660   }
661   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
662   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
663   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
664 
665   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
666   baij->rowvalues = 0;
667 
668   PetscFunctionReturn(0);
669 }
670 
671 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
672 #undef __FUNCT__
673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675 {
676   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode    ierr;
678   PetscInt          bs = mat->rmap.bs;
679   PetscMPIInt       size = baij->size,rank = baij->rank;
680   PetscTruth        iascii,isdraw;
681   PetscViewer       sviewer;
682   PetscViewerFormat format;
683 
684   PetscFunctionBegin;
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
687   if (iascii) {
688     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       ierr = MPI_Comm_rank(((PetscObject)mat)->comm,&rank);CHKERRQ(ierr);
692       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701       ierr = PetscViewerASCIIPrintf(viewer,"Information on VecScatter used in matrix-vector product: \n");CHKERRQ(ierr);
702       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
703       PetscFunctionReturn(0);
704     } else if (format == PETSC_VIEWER_ASCII_INFO) {
705       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
706       PetscFunctionReturn(0);
707     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
708       PetscFunctionReturn(0);
709     }
710   }
711 
712   if (isdraw) {
713     PetscDraw  draw;
714     PetscTruth isnull;
715     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
716     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
717   }
718 
719   if (size == 1) {
720     ierr = PetscObjectSetName((PetscObject)baij->A,((PetscObject)mat)->name);CHKERRQ(ierr);
721     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
722   } else {
723     /* assemble the entire matrix onto first processor. */
724     Mat          A;
725     Mat_SeqSBAIJ *Aloc;
726     Mat_SeqBAIJ  *Bloc;
727     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
728     MatScalar    *a;
729 
730     /* Should this be the same type as mat? */
731     ierr = MatCreate(((PetscObject)mat)->comm,&A);CHKERRQ(ierr);
732     if (!rank) {
733       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
734     } else {
735       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
736     }
737     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
738     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
739     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
740 
741     /* copy over the A part */
742     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
743     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
744     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
745 
746     for (i=0; i<mbs; i++) {
747       rvals[0] = bs*(baij->rstartbs + i);
748       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
749       for (j=ai[i]; j<ai[i+1]; j++) {
750         col = (baij->cstartbs+aj[j])*bs;
751         for (k=0; k<bs; k++) {
752           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
753           col++; a += bs;
754         }
755       }
756     }
757     /* copy over the B part */
758     Bloc = (Mat_SeqBAIJ*)baij->B->data;
759     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
760     for (i=0; i<mbs; i++) {
761 
762       rvals[0] = bs*(baij->rstartbs + i);
763       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764       for (j=ai[i]; j<ai[i+1]; j++) {
765         col = baij->garray[aj[j]]*bs;
766         for (k=0; k<bs; k++) {
767           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768           col++; a += bs;
769         }
770       }
771     }
772     ierr = PetscFree(rvals);CHKERRQ(ierr);
773     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
774     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
775     /*
776        Everyone has to call to draw the matrix since the graphics waits are
777        synchronized across all processors that share the PetscDraw object
778     */
779     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
780     if (!rank) {
781       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,((PetscObject)mat)->name);CHKERRQ(ierr);
782       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
783     }
784     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
785     ierr = MatDestroy(A);CHKERRQ(ierr);
786   }
787   PetscFunctionReturn(0);
788 }
789 
790 #undef __FUNCT__
791 #define __FUNCT__ "MatView_MPISBAIJ"
792 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
793 {
794   PetscErrorCode ierr;
795   PetscTruth     iascii,isdraw,issocket,isbinary;
796 
797   PetscFunctionBegin;
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
801   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
802   if (iascii || isdraw || issocket || isbinary) {
803     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
804   } else {
805     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
806   }
807   PetscFunctionReturn(0);
808 }
809 
810 #undef __FUNCT__
811 #define __FUNCT__ "MatDestroy_MPISBAIJ"
812 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
813 {
814   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
815   PetscErrorCode ierr;
816 
817   PetscFunctionBegin;
818 #if defined(PETSC_USE_LOG)
819   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
820 #endif
821   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
822   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
823   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
824   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
825 #if defined (PETSC_USE_CTABLE)
826   if (baij->colmap) {ierr = PetscTableDestroy(baij->colmap);CHKERRQ(ierr);}
827 #else
828   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
829 #endif
830   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
831   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
832   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
833   if (baij->slvec0) {
834     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
835     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
836   }
837   if (baij->slvec1) {
838     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
839     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
840     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
841   }
842   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
843   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
844   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
845   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
846 #if defined(PETSC_USE_MAT_SINGLE)
847   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
848 #endif
849   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
850   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
851   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
852   ierr = PetscFree(baij);CHKERRQ(ierr);
853 
854   ierr = PetscObjectChangeTypeName((PetscObject)mat,0);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
857   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
858   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
859   PetscFunctionReturn(0);
860 }
861 
862 #undef __FUNCT__
863 #define __FUNCT__ "MatMult_MPISBAIJ"
864 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
865 {
866   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
867   PetscErrorCode ierr;
868   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
869   PetscScalar    *x,*from,zero=0.0;
870 
871   PetscFunctionBegin;
872   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
873   if (nt != A->cmap.n) {
874     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
875   }
876 
877   /* diagonal part */
878   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
879   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
880 
881   /* subdiagonal part */
882   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
883 
884   /* copy x into the vec slvec0 */
885   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
886   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
887 
888   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
889   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
890   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
891 
892   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
893   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
894   /* supperdiagonal part */
895   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
896   PetscFunctionReturn(0);
897 }
898 
899 #undef __FUNCT__
900 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
901 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
902 {
903   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
904   PetscErrorCode ierr;
905   PetscInt       nt;
906 
907   PetscFunctionBegin;
908   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
909   if (nt != A->cmap.n) {
910     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
911   }
912   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
913   if (nt != A->rmap.N) {
914     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
915   }
916 
917   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
918   /* do diagonal part */
919   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
920   /* do supperdiagonal part */
921   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
922   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
923   /* do subdiagonal part */
924   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
925   ierr = VecScatterBegin(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
926   ierr = VecScatterEnd(a->Mvctx,a->lvec,yy,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
927 
928   PetscFunctionReturn(0);
929 }
930 
931 #undef __FUNCT__
932 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
933 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
934 {
935   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
936   PetscErrorCode ierr;
937   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
938   PetscScalar    *x,*from,zero=0.0;
939 
940   PetscFunctionBegin;
941   /*
942   PetscSynchronizedPrintf(((PetscObject)A)->comm," MatMultAdd is called ...\n");
943   PetscSynchronizedFlush(((PetscObject)A)->comm);
944   */
945   /* diagonal part */
946   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
947   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
948 
949   /* subdiagonal part */
950   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
951 
952   /* copy x into the vec slvec0 */
953   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
954   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
955   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
956   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
957 
958   ierr = VecScatterBegin(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
959   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
960   ierr = VecScatterEnd(a->sMvctx,a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
961 
962   /* supperdiagonal part */
963   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
964 
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
970 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
971 {
972   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
973   PetscErrorCode ierr;
974 
975   PetscFunctionBegin;
976   ierr = VecScatterBegin(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
977   /* do diagonal part */
978   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
979   /* do supperdiagonal part */
980   ierr = VecScatterEnd(a->Mvctx,xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
981   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
982 
983   /* do subdiagonal part */
984   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
985   ierr = VecScatterBegin(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
986   ierr = VecScatterEnd(a->Mvctx,a->lvec,zz,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
987 
988   PetscFunctionReturn(0);
989 }
990 
991 /*
992   This only works correctly for square matrices where the subblock A->A is the
993    diagonal block
994 */
995 #undef __FUNCT__
996 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
997 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
998 {
999   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1000   PetscErrorCode ierr;
1001 
1002   PetscFunctionBegin;
1003   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1004   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatScale_MPISBAIJ"
1010 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1011 {
1012   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1013   PetscErrorCode ierr;
1014 
1015   PetscFunctionBegin;
1016   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1017   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1023 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1024 {
1025   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1026   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1027   PetscErrorCode ierr;
1028   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1029   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1030   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1031 
1032   PetscFunctionBegin;
1033   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1034   mat->getrowactive = PETSC_TRUE;
1035 
1036   if (!mat->rowvalues && (idx || v)) {
1037     /*
1038         allocate enough space to hold information from the longest row.
1039     */
1040     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1041     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1042     PetscInt     max = 1,mbs = mat->mbs,tmp;
1043     for (i=0; i<mbs; i++) {
1044       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1045       if (max < tmp) { max = tmp; }
1046     }
1047     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1048     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1049   }
1050 
1051   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1052   lrow = row - brstart;  /* local row index */
1053 
1054   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1055   if (!v)   {pvA = 0; pvB = 0;}
1056   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1057   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1058   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1059   nztot = nzA + nzB;
1060 
1061   cmap  = mat->garray;
1062   if (v  || idx) {
1063     if (nztot) {
1064       /* Sort by increasing column numbers, assuming A and B already sorted */
1065       PetscInt imark = -1;
1066       if (v) {
1067         *v = v_p = mat->rowvalues;
1068         for (i=0; i<nzB; i++) {
1069           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1070           else break;
1071         }
1072         imark = i;
1073         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1074         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1075       }
1076       if (idx) {
1077         *idx = idx_p = mat->rowindices;
1078         if (imark > -1) {
1079           for (i=0; i<imark; i++) {
1080             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1081           }
1082         } else {
1083           for (i=0; i<nzB; i++) {
1084             if (cmap[cworkB[i]/bs] < cstart)
1085               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1086             else break;
1087           }
1088           imark = i;
1089         }
1090         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1091         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1092       }
1093     } else {
1094       if (idx) *idx = 0;
1095       if (v)   *v   = 0;
1096     }
1097   }
1098   *nz = nztot;
1099   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1100   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1101   PetscFunctionReturn(0);
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1106 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1107 {
1108   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1109 
1110   PetscFunctionBegin;
1111   if (!baij->getrowactive) {
1112     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1113   }
1114   baij->getrowactive = PETSC_FALSE;
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1120 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1121 {
1122   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1123   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1124 
1125   PetscFunctionBegin;
1126   aA->getrow_utriangular = PETSC_TRUE;
1127   PetscFunctionReturn(0);
1128 }
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1131 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1134   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1135 
1136   PetscFunctionBegin;
1137   aA->getrow_utriangular = PETSC_FALSE;
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1143 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1144 {
1145   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1146   PetscErrorCode ierr;
1147 
1148   PetscFunctionBegin;
1149   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1150   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1156 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1157 {
1158   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1159   PetscErrorCode ierr;
1160 
1161   PetscFunctionBegin;
1162   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1163   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1164   PetscFunctionReturn(0);
1165 }
1166 
1167 #undef __FUNCT__
1168 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1169 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1170 {
1171   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1172   PetscErrorCode ierr;
1173 
1174   PetscFunctionBegin;
1175   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1176   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1182 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1183 {
1184   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1185   Mat            A = a->A,B = a->B;
1186   PetscErrorCode ierr;
1187   PetscReal      isend[5],irecv[5];
1188 
1189   PetscFunctionBegin;
1190   info->block_size     = (PetscReal)matin->rmap.bs;
1191   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1192   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1193   isend[3] = info->memory;  isend[4] = info->mallocs;
1194   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1195   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1196   isend[3] += info->memory;  isend[4] += info->mallocs;
1197   if (flag == MAT_LOCAL) {
1198     info->nz_used      = isend[0];
1199     info->nz_allocated = isend[1];
1200     info->nz_unneeded  = isend[2];
1201     info->memory       = isend[3];
1202     info->mallocs      = isend[4];
1203   } else if (flag == MAT_GLOBAL_MAX) {
1204     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,((PetscObject)matin)->comm);CHKERRQ(ierr);
1205     info->nz_used      = irecv[0];
1206     info->nz_allocated = irecv[1];
1207     info->nz_unneeded  = irecv[2];
1208     info->memory       = irecv[3];
1209     info->mallocs      = irecv[4];
1210   } else if (flag == MAT_GLOBAL_SUM) {
1211     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,((PetscObject)matin)->comm);CHKERRQ(ierr);
1212     info->nz_used      = irecv[0];
1213     info->nz_allocated = irecv[1];
1214     info->nz_unneeded  = irecv[2];
1215     info->memory       = irecv[3];
1216     info->mallocs      = irecv[4];
1217   } else {
1218     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1219   }
1220   info->rows_global       = (PetscReal)A->rmap.N;
1221   info->columns_global    = (PetscReal)A->cmap.N;
1222   info->rows_local        = (PetscReal)A->rmap.N;
1223   info->columns_local     = (PetscReal)A->cmap.N;
1224   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1225   info->fill_ratio_needed = 0;
1226   info->factor_mallocs    = 0;
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1232 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op,PetscTruth flg)
1233 {
1234   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1235   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1236   PetscErrorCode ierr;
1237 
1238   PetscFunctionBegin;
1239   switch (op) {
1240   case MAT_NEW_NONZERO_LOCATIONS:
1241   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1242   case MAT_KEEP_ZEROED_ROWS:
1243   case MAT_NEW_NONZERO_LOCATION_ERR:
1244     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1245     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1246     break;
1247   case MAT_ROW_ORIENTED:
1248     a->roworiented = flg;
1249     ierr = MatSetOption(a->A,op,flg);CHKERRQ(ierr);
1250     ierr = MatSetOption(a->B,op,flg);CHKERRQ(ierr);
1251     break;
1252   case MAT_NEW_DIAGONALS:
1253     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1254     break;
1255   case MAT_IGNORE_OFF_PROC_ENTRIES:
1256     a->donotstash = flg;
1257     break;
1258   case MAT_USE_HASH_TABLE:
1259     a->ht_flag = flg;
1260     break;
1261   case MAT_HERMITIAN:
1262     if (flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1263   case MAT_SYMMETRIC:
1264   case MAT_STRUCTURALLY_SYMMETRIC:
1265   case MAT_SYMMETRY_ETERNAL:
1266     if (!flg) SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1267     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1268     break;
1269   case MAT_IGNORE_LOWER_TRIANGULAR:
1270     aA->ignore_ltriangular = flg;
1271     break;
1272   case MAT_ERROR_LOWER_TRIANGULAR:
1273     aA->ignore_ltriangular = flg;
1274     break;
1275   case MAT_GETROW_UPPERTRIANGULAR:
1276     aA->getrow_utriangular = flg;
1277     break;
1278   default:
1279     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1280   }
1281   PetscFunctionReturn(0);
1282 }
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1286 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1287 {
1288   PetscErrorCode ierr;
1289   PetscFunctionBegin;
1290   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1291   PetscFunctionReturn(0);
1292 }
1293 
1294 #undef __FUNCT__
1295 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1296 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1297 {
1298   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1299   Mat            a=baij->A, b=baij->B;
1300   PetscErrorCode ierr;
1301   PetscInt       nv,m,n;
1302   PetscTruth     flg;
1303 
1304   PetscFunctionBegin;
1305   if (ll != rr){
1306     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1307     if (!flg)
1308       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1309   }
1310   if (!ll) PetscFunctionReturn(0);
1311 
1312   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1313   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1314 
1315   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1316   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1317 
1318   ierr = VecScatterBegin(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1319 
1320   /* left diagonalscale the off-diagonal part */
1321   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1322 
1323   /* scale the diagonal part */
1324   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1325 
1326   /* right diagonalscale the off-diagonal part */
1327   ierr = VecScatterEnd(baij->Mvctx,rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
1328   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 #undef __FUNCT__
1333 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1334 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1335 {
1336   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1337   PetscErrorCode ierr;
1338 
1339   PetscFunctionBegin;
1340   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1341   PetscFunctionReturn(0);
1342 }
1343 
1344 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1345 
1346 #undef __FUNCT__
1347 #define __FUNCT__ "MatEqual_MPISBAIJ"
1348 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1349 {
1350   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1351   Mat            a,b,c,d;
1352   PetscTruth     flg;
1353   PetscErrorCode ierr;
1354 
1355   PetscFunctionBegin;
1356   a = matA->A; b = matA->B;
1357   c = matB->A; d = matB->B;
1358 
1359   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1360   if (flg) {
1361     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1362   }
1363   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,((PetscObject)A)->comm);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatCopy_MPISBAIJ"
1369 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1370 {
1371   PetscErrorCode ierr;
1372   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1373   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1374 
1375   PetscFunctionBegin;
1376   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1377   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1378     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1379     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1380     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1381   } else {
1382     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1383     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1384   }
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1390 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1391 {
1392   PetscErrorCode ierr;
1393 
1394   PetscFunctionBegin;
1395   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1396   PetscFunctionReturn(0);
1397 }
1398 
1399 #include "petscblaslapack.h"
1400 #undef __FUNCT__
1401 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1402 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1403 {
1404   PetscErrorCode ierr;
1405   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1406   PetscBLASInt   bnz,one=1;
1407   Mat_SeqSBAIJ   *xa,*ya;
1408   Mat_SeqBAIJ    *xb,*yb;
1409 
1410   PetscFunctionBegin;
1411   if (str == SAME_NONZERO_PATTERN) {
1412     PetscScalar alpha = a;
1413     xa = (Mat_SeqSBAIJ *)xx->A->data;
1414     ya = (Mat_SeqSBAIJ *)yy->A->data;
1415     bnz = (PetscBLASInt)xa->nz;
1416     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1417     xb = (Mat_SeqBAIJ *)xx->B->data;
1418     yb = (Mat_SeqBAIJ *)yy->B->data;
1419     bnz = (PetscBLASInt)xb->nz;
1420     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1421   } else {
1422     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1423     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1424     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1425   }
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1431 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1432 {
1433   PetscErrorCode ierr;
1434   PetscInt       i;
1435   PetscTruth     flg;
1436 
1437   PetscFunctionBegin;
1438   for (i=0; i<n; i++) {
1439     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1440     if (!flg) {
1441       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1442     }
1443   }
1444   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 
1449 /* -------------------------------------------------------------------*/
1450 static struct _MatOps MatOps_Values = {
1451        MatSetValues_MPISBAIJ,
1452        MatGetRow_MPISBAIJ,
1453        MatRestoreRow_MPISBAIJ,
1454        MatMult_MPISBAIJ,
1455 /* 4*/ MatMultAdd_MPISBAIJ,
1456        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1457        MatMultAdd_MPISBAIJ,
1458        0,
1459        0,
1460        0,
1461 /*10*/ 0,
1462        0,
1463        0,
1464        MatRelax_MPISBAIJ,
1465        MatTranspose_MPISBAIJ,
1466 /*15*/ MatGetInfo_MPISBAIJ,
1467        MatEqual_MPISBAIJ,
1468        MatGetDiagonal_MPISBAIJ,
1469        MatDiagonalScale_MPISBAIJ,
1470        MatNorm_MPISBAIJ,
1471 /*20*/ MatAssemblyBegin_MPISBAIJ,
1472        MatAssemblyEnd_MPISBAIJ,
1473        0,
1474        MatSetOption_MPISBAIJ,
1475        MatZeroEntries_MPISBAIJ,
1476 /*25*/ 0,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1482        0,
1483        0,
1484        0,
1485        0,
1486 /*35*/ MatDuplicate_MPISBAIJ,
1487        0,
1488        0,
1489        0,
1490        0,
1491 /*40*/ MatAXPY_MPISBAIJ,
1492        MatGetSubMatrices_MPISBAIJ,
1493        MatIncreaseOverlap_MPISBAIJ,
1494        MatGetValues_MPISBAIJ,
1495        MatCopy_MPISBAIJ,
1496 /*45*/ 0,
1497        MatScale_MPISBAIJ,
1498        0,
1499        0,
1500        0,
1501 /*50*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*55*/ 0,
1507        0,
1508        MatSetUnfactored_MPISBAIJ,
1509        0,
1510        MatSetValuesBlocked_MPISBAIJ,
1511 /*60*/ 0,
1512        0,
1513        0,
1514        0,
1515        0,
1516 /*65*/ 0,
1517        0,
1518        0,
1519        0,
1520        0,
1521 /*70*/ MatGetRowMaxAbs_MPISBAIJ,
1522        0,
1523        0,
1524        0,
1525        0,
1526 /*75*/ 0,
1527        0,
1528        0,
1529        0,
1530        0,
1531 /*80*/ 0,
1532        0,
1533        0,
1534        0,
1535        MatLoad_MPISBAIJ,
1536 /*85*/ 0,
1537        0,
1538        0,
1539        0,
1540        0,
1541 /*90*/ 0,
1542        0,
1543        0,
1544        0,
1545        0,
1546 /*95*/ 0,
1547        0,
1548        0,
1549        0,
1550        0,
1551 /*100*/0,
1552        0,
1553        0,
1554        0,
1555        0,
1556 /*105*/0,
1557        MatRealPart_MPISBAIJ,
1558        MatImaginaryPart_MPISBAIJ,
1559        MatGetRowUpperTriangular_MPISBAIJ,
1560        MatRestoreRowUpperTriangular_MPISBAIJ
1561 };
1562 
1563 
1564 EXTERN_C_BEGIN
1565 #undef __FUNCT__
1566 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1567 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1568 {
1569   PetscFunctionBegin;
1570   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1571   *iscopy = PETSC_FALSE;
1572   PetscFunctionReturn(0);
1573 }
1574 EXTERN_C_END
1575 
1576 EXTERN_C_BEGIN
1577 #undef __FUNCT__
1578 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1579 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1580 {
1581   Mat_MPISBAIJ   *b;
1582   PetscErrorCode ierr;
1583   PetscInt       i,mbs,Mbs;
1584 
1585   PetscFunctionBegin;
1586   ierr = PetscOptionsBegin(((PetscObject)B)->comm,((PetscObject)B)->prefix,"Options for MPISBAIJ matrix","Mat");CHKERRQ(ierr);
1587     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
1588   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1589 
1590   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1591   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1592   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1593   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1594   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1595 
1596   B->rmap.bs = B->cmap.bs = bs;
1597   ierr = PetscMapSetUp(&B->rmap);CHKERRQ(ierr);
1598   ierr = PetscMapSetUp(&B->cmap);CHKERRQ(ierr);
1599 
1600   if (d_nnz) {
1601     for (i=0; i<B->rmap.n/bs; i++) {
1602       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1603     }
1604   }
1605   if (o_nnz) {
1606     for (i=0; i<B->rmap.n/bs; i++) {
1607       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1608     }
1609   }
1610   B->preallocated = PETSC_TRUE;
1611 
1612   b   = (Mat_MPISBAIJ*)B->data;
1613   mbs = B->rmap.n/bs;
1614   Mbs = B->rmap.N/bs;
1615   if (mbs*bs != B->rmap.n) {
1616     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1617   }
1618 
1619   B->rmap.bs  = bs;
1620   b->bs2 = bs*bs;
1621   b->mbs = mbs;
1622   b->nbs = mbs;
1623   b->Mbs = Mbs;
1624   b->Nbs = Mbs;
1625 
1626   for (i=0; i<=b->size; i++) {
1627     b->rangebs[i] = B->rmap.range[i]/bs;
1628   }
1629   b->rstartbs = B->rmap.rstart/bs;
1630   b->rendbs   = B->rmap.rend/bs;
1631 
1632   b->cstartbs = B->cmap.rstart/bs;
1633   b->cendbs   = B->cmap.rend/bs;
1634 
1635   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1636   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1637   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1638   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1639   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1640 
1641   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1642   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1643   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1644   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1645   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1646 
1647   /* build cache for off array entries formed */
1648   ierr = MatStashCreate_Private(((PetscObject)B)->comm,bs,&B->bstash);CHKERRQ(ierr);
1649 
1650   PetscFunctionReturn(0);
1651 }
1652 EXTERN_C_END
1653 
1654 /*MC
1655    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1656    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1657 
1658    Options Database Keys:
1659 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1660 
1661   Level: beginner
1662 
1663 .seealso: MatCreateMPISBAIJ
1664 M*/
1665 
1666 EXTERN_C_BEGIN
1667 #undef __FUNCT__
1668 #define __FUNCT__ "MatCreate_MPISBAIJ"
1669 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1670 {
1671   Mat_MPISBAIJ   *b;
1672   PetscErrorCode ierr;
1673   PetscTruth     flg;
1674 
1675   PetscFunctionBegin;
1676 
1677   ierr    = PetscNewLog(B,Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1678   B->data = (void*)b;
1679   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1680 
1681   B->ops->destroy    = MatDestroy_MPISBAIJ;
1682   B->ops->view       = MatView_MPISBAIJ;
1683   B->mapping    = 0;
1684   B->factor     = 0;
1685   B->assembled  = PETSC_FALSE;
1686 
1687   B->insertmode = NOT_SET_VALUES;
1688   ierr = MPI_Comm_rank(((PetscObject)B)->comm,&b->rank);CHKERRQ(ierr);
1689   ierr = MPI_Comm_size(((PetscObject)B)->comm,&b->size);CHKERRQ(ierr);
1690 
1691   /* build local table of row and column ownerships */
1692   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1693 
1694   /* build cache for off array entries formed */
1695   ierr = MatStashCreate_Private(((PetscObject)B)->comm,1,&B->stash);CHKERRQ(ierr);
1696   b->donotstash  = PETSC_FALSE;
1697   b->colmap      = PETSC_NULL;
1698   b->garray      = PETSC_NULL;
1699   b->roworiented = PETSC_TRUE;
1700 
1701 #if defined(PETSC_USE_MAT_SINGLE)
1702   /* stuff for MatSetValues_XXX in single precision */
1703   b->setvalueslen     = 0;
1704   b->setvaluescopy    = PETSC_NULL;
1705 #endif
1706 
1707   /* stuff used in block assembly */
1708   b->barray       = 0;
1709 
1710   /* stuff used for matrix vector multiply */
1711   b->lvec         = 0;
1712   b->Mvctx        = 0;
1713   b->slvec0       = 0;
1714   b->slvec0b      = 0;
1715   b->slvec1       = 0;
1716   b->slvec1a      = 0;
1717   b->slvec1b      = 0;
1718   b->sMvctx       = 0;
1719 
1720   /* stuff for MatGetRow() */
1721   b->rowindices   = 0;
1722   b->rowvalues    = 0;
1723   b->getrowactive = PETSC_FALSE;
1724 
1725   /* hash table stuff */
1726   b->ht           = 0;
1727   b->hd           = 0;
1728   b->ht_size      = 0;
1729   b->ht_flag      = PETSC_FALSE;
1730   b->ht_fact      = 0;
1731   b->ht_total_ct  = 0;
1732   b->ht_insert_ct = 0;
1733 
1734   b->in_loc       = 0;
1735   b->v_loc        = 0;
1736   b->n_loc        = 0;
1737   ierr = PetscOptionsBegin(((PetscObject)B)->comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 1","Mat");CHKERRQ(ierr);
1738     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1739     if (flg) {
1740       PetscReal fact = 1.39;
1741       ierr = MatSetOption(B,MAT_USE_HASH_TABLE,PETSC_TRUE);CHKERRQ(ierr);
1742       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1743       if (fact <= 1.0) fact = 1.39;
1744       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1745       ierr = PetscInfo1(B,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1746     }
1747   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1748 
1749   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1750                                      "MatStoreValues_MPISBAIJ",
1751                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1752   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1753                                      "MatRetrieveValues_MPISBAIJ",
1754                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1755   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1756                                      "MatGetDiagonalBlock_MPISBAIJ",
1757                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1758   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1759                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1760                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1761   B->symmetric                  = PETSC_TRUE;
1762   B->structurally_symmetric     = PETSC_TRUE;
1763   B->symmetric_set              = PETSC_TRUE;
1764   B->structurally_symmetric_set = PETSC_TRUE;
1765   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 EXTERN_C_END
1769 
1770 /*MC
1771    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1772 
1773    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1774    and MATMPISBAIJ otherwise.
1775 
1776    Options Database Keys:
1777 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1778 
1779   Level: beginner
1780 
1781 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1782 M*/
1783 
1784 EXTERN_C_BEGIN
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatCreate_SBAIJ"
1787 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1788 {
1789   PetscErrorCode ierr;
1790   PetscMPIInt    size;
1791 
1792   PetscFunctionBegin;
1793   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
1794   if (size == 1) {
1795     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1796   } else {
1797     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1798   }
1799   PetscFunctionReturn(0);
1800 }
1801 EXTERN_C_END
1802 
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1805 /*@C
1806    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1807    the user should preallocate the matrix storage by setting the parameters
1808    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1809    performance can be increased by more than a factor of 50.
1810 
1811    Collective on Mat
1812 
1813    Input Parameters:
1814 +  A - the matrix
1815 .  bs   - size of blockk
1816 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1817            submatrix  (same for all local rows)
1818 .  d_nnz - array containing the number of block nonzeros in the various block rows
1819            in the upper triangular and diagonal part of the in diagonal portion of the local
1820            (possibly different for each block row) or PETSC_NULL.  You must leave room
1821            for the diagonal entry even if it is zero.
1822 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1823            submatrix (same for all local rows).
1824 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1825            off-diagonal portion of the local submatrix (possibly different for
1826            each block row) or PETSC_NULL.
1827 
1828 
1829    Options Database Keys:
1830 .   -mat_no_unroll - uses code that does not unroll the loops in the
1831                      block calculations (much slower)
1832 .   -mat_block_size - size of the blocks to use
1833 
1834    Notes:
1835 
1836    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1837    than it must be used on all processors that share the object for that argument.
1838 
1839    If the *_nnz parameter is given then the *_nz parameter is ignored
1840 
1841    Storage Information:
1842    For a square global matrix we define each processor's diagonal portion
1843    to be its local rows and the corresponding columns (a square submatrix);
1844    each processor's off-diagonal portion encompasses the remainder of the
1845    local matrix (a rectangular submatrix).
1846 
1847    The user can specify preallocated storage for the diagonal part of
1848    the local submatrix with either d_nz or d_nnz (not both).  Set
1849    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1850    memory allocation.  Likewise, specify preallocated storage for the
1851    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1852 
1853    You can call MatGetInfo() to get information on how effective the preallocation was;
1854    for example the fields mallocs,nz_allocated,nz_used,nz_unneeded;
1855    You can also run with the option -info and look for messages with the string
1856    malloc in them to see if additional memory allocation was needed.
1857 
1858    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1859    the figure below we depict these three local rows and all columns (0-11).
1860 
1861 .vb
1862            0 1 2 3 4 5 6 7 8 9 10 11
1863           -------------------
1864    row 3  |  o o o d d d o o o o o o
1865    row 4  |  o o o d d d o o o o o o
1866    row 5  |  o o o d d d o o o o o o
1867           -------------------
1868 .ve
1869 
1870    Thus, any entries in the d locations are stored in the d (diagonal)
1871    submatrix, and any entries in the o locations are stored in the
1872    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1873    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1874 
1875    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1876    plus the diagonal part of the d matrix,
1877    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1878    In general, for PDE problems in which most nonzeros are near the diagonal,
1879    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1880    or you will get TERRIBLE performance; see the users' manual chapter on
1881    matrices.
1882 
1883    Level: intermediate
1884 
1885 .keywords: matrix, block, aij, compressed row, sparse, parallel
1886 
1887 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1888 @*/
1889 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1890 {
1891   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1892 
1893   PetscFunctionBegin;
1894   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1895   if (f) {
1896     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1897   }
1898   PetscFunctionReturn(0);
1899 }
1900 
1901 #undef __FUNCT__
1902 #define __FUNCT__ "MatCreateMPISBAIJ"
1903 /*@C
1904    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1905    (block compressed row).  For good matrix assembly performance
1906    the user should preallocate the matrix storage by setting the parameters
1907    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1908    performance can be increased by more than a factor of 50.
1909 
1910    Collective on MPI_Comm
1911 
1912    Input Parameters:
1913 +  comm - MPI communicator
1914 .  bs   - size of blockk
1915 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1916            This value should be the same as the local size used in creating the
1917            y vector for the matrix-vector product y = Ax.
1918 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1919            This value should be the same as the local size used in creating the
1920            x vector for the matrix-vector product y = Ax.
1921 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1922 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1923 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1924            submatrix  (same for all local rows)
1925 .  d_nnz - array containing the number of block nonzeros in the various block rows
1926            in the upper triangular portion of the in diagonal portion of the local
1927            (possibly different for each block block row) or PETSC_NULL.
1928            You must leave room for the diagonal entry even if it is zero.
1929 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1930            submatrix (same for all local rows).
1931 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1932            off-diagonal portion of the local submatrix (possibly different for
1933            each block row) or PETSC_NULL.
1934 
1935    Output Parameter:
1936 .  A - the matrix
1937 
1938    Options Database Keys:
1939 .   -mat_no_unroll - uses code that does not unroll the loops in the
1940                      block calculations (much slower)
1941 .   -mat_block_size - size of the blocks to use
1942 .   -mat_mpi - use the parallel matrix data structures even on one processor
1943                (defaults to using SeqBAIJ format on one processor)
1944 
1945    It is recommended that one use the MatCreate(), MatSetType() and/or MatSetFromOptions(),
1946    MatXXXXSetPreallocation() paradgm instead of this routine directly. This is definitely
1947    true if you plan to use the external direct solvers such as SuperLU, MUMPS or Spooles.
1948    [MatXXXXSetPreallocation() is, for example, MatSeqAIJSetPreallocation]
1949 
1950    Notes:
1951    The number of rows and columns must be divisible by blocksize.
1952    This matrix type does not support complex Hermitian operation.
1953 
1954    The user MUST specify either the local or global matrix dimensions
1955    (possibly both).
1956 
1957    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1958    than it must be used on all processors that share the object for that argument.
1959 
1960    If the *_nnz parameter is given then the *_nz parameter is ignored
1961 
1962    Storage Information:
1963    For a square global matrix we define each processor's diagonal portion
1964    to be its local rows and the corresponding columns (a square submatrix);
1965    each processor's off-diagonal portion encompasses the remainder of the
1966    local matrix (a rectangular submatrix).
1967 
1968    The user can specify preallocated storage for the diagonal part of
1969    the local submatrix with either d_nz or d_nnz (not both).  Set
1970    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1971    memory allocation.  Likewise, specify preallocated storage for the
1972    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1973 
1974    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1975    the figure below we depict these three local rows and all columns (0-11).
1976 
1977 .vb
1978            0 1 2 3 4 5 6 7 8 9 10 11
1979           -------------------
1980    row 3  |  o o o d d d o o o o o o
1981    row 4  |  o o o d d d o o o o o o
1982    row 5  |  o o o d d d o o o o o o
1983           -------------------
1984 .ve
1985 
1986    Thus, any entries in the d locations are stored in the d (diagonal)
1987    submatrix, and any entries in the o locations are stored in the
1988    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1989    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1990 
1991    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1992    plus the diagonal part of the d matrix,
1993    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1994    In general, for PDE problems in which most nonzeros are near the diagonal,
1995    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1996    or you will get TERRIBLE performance; see the users' manual chapter on
1997    matrices.
1998 
1999    Level: intermediate
2000 
2001 .keywords: matrix, block, aij, compressed row, sparse, parallel
2002 
2003 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2004 @*/
2005 
2006 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2007 {
2008   PetscErrorCode ierr;
2009   PetscMPIInt    size;
2010 
2011   PetscFunctionBegin;
2012   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2013   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2014   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2015   if (size > 1) {
2016     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2017     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2018   } else {
2019     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2020     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2021   }
2022   PetscFunctionReturn(0);
2023 }
2024 
2025 
2026 #undef __FUNCT__
2027 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2028 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2029 {
2030   Mat            mat;
2031   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2032   PetscErrorCode ierr;
2033   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2034   PetscScalar    *array;
2035 
2036   PetscFunctionBegin;
2037   *newmat       = 0;
2038   ierr = MatCreate(((PetscObject)matin)->comm,&mat);CHKERRQ(ierr);
2039   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2040   ierr = MatSetType(mat,((PetscObject)matin)->type_name);CHKERRQ(ierr);
2041   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2042   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2043   ierr = PetscMapCopy(((PetscObject)matin)->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2044 
2045   mat->factor       = matin->factor;
2046   mat->preallocated = PETSC_TRUE;
2047   mat->assembled    = PETSC_TRUE;
2048   mat->insertmode   = NOT_SET_VALUES;
2049 
2050   a = (Mat_MPISBAIJ*)mat->data;
2051   a->bs2   = oldmat->bs2;
2052   a->mbs   = oldmat->mbs;
2053   a->nbs   = oldmat->nbs;
2054   a->Mbs   = oldmat->Mbs;
2055   a->Nbs   = oldmat->Nbs;
2056 
2057 
2058   a->size         = oldmat->size;
2059   a->rank         = oldmat->rank;
2060   a->donotstash   = oldmat->donotstash;
2061   a->roworiented  = oldmat->roworiented;
2062   a->rowindices   = 0;
2063   a->rowvalues    = 0;
2064   a->getrowactive = PETSC_FALSE;
2065   a->barray       = 0;
2066   a->rstartbs    = oldmat->rstartbs;
2067   a->rendbs      = oldmat->rendbs;
2068   a->cstartbs    = oldmat->cstartbs;
2069   a->cendbs      = oldmat->cendbs;
2070 
2071   /* hash table stuff */
2072   a->ht           = 0;
2073   a->hd           = 0;
2074   a->ht_size      = 0;
2075   a->ht_flag      = oldmat->ht_flag;
2076   a->ht_fact      = oldmat->ht_fact;
2077   a->ht_total_ct  = 0;
2078   a->ht_insert_ct = 0;
2079 
2080   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2081   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,1,&mat->stash);CHKERRQ(ierr);
2082   ierr = MatStashCreate_Private(((PetscObject)matin)->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2083   if (oldmat->colmap) {
2084 #if defined (PETSC_USE_CTABLE)
2085     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2086 #else
2087     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2088     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2089     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2090 #endif
2091   } else a->colmap = 0;
2092 
2093   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2094     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2095     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2096     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2097   } else a->garray = 0;
2098 
2099   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2100   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2101   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2102   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2103 
2104   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2105   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2106   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2107   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2108 
2109   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2110   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2111   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2113   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2114   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2115   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2116   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2117   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2118   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2119   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2122 
2123   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2124   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2125   a->sMvctx = oldmat->sMvctx;
2126   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2127 
2128   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2129   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2130   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2131   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2132   ierr = PetscFListDuplicate(((PetscObject)matin)->qlist,&((PetscObject)mat)->qlist);CHKERRQ(ierr);
2133   *newmat = mat;
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #include "petscsys.h"
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatLoad_MPISBAIJ"
2141 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2142 {
2143   Mat            A;
2144   PetscErrorCode ierr;
2145   PetscInt       i,nz,j,rstart,rend;
2146   PetscScalar    *vals,*buf;
2147   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2148   MPI_Status     status;
2149   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2150   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2151   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2152   PetscInt       bs=1,Mbs,mbs,extra_rows;
2153   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2154   PetscInt       dcount,kmax,k,nzcount,tmp;
2155   int            fd;
2156 
2157   PetscFunctionBegin;
2158   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPISBAIJ matrix 2","Mat");CHKERRQ(ierr);
2159     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2160   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2161 
2162   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2163   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2164   if (!rank) {
2165     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2166     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2167     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2168     if (header[3] < 0) {
2169       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2170     }
2171   }
2172 
2173   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2174   M = header[1]; N = header[2];
2175 
2176   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2177 
2178   /*
2179      This code adds extra rows to make sure the number of rows is
2180      divisible by the blocksize
2181   */
2182   Mbs        = M/bs;
2183   extra_rows = bs - M + bs*(Mbs);
2184   if (extra_rows == bs) extra_rows = 0;
2185   else                  Mbs++;
2186   if (extra_rows &&!rank) {
2187     ierr = PetscInfo(viewer,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2188   }
2189 
2190   /* determine ownership of all rows */
2191   mbs        = Mbs/size + ((Mbs % size) > rank);
2192   m          = mbs*bs;
2193   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2194   browners   = rowners + size + 1;
2195   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2196   rowners[0] = 0;
2197   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2198   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2199   rstart = rowners[rank];
2200   rend   = rowners[rank+1];
2201 
2202   /* distribute row lengths to all processors */
2203   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2204   if (!rank) {
2205     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2206     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2207     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2208     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2209     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2210     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2211     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2212   } else {
2213     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2214   }
2215 
2216   if (!rank) {   /* procs[0] */
2217     /* calculate the number of nonzeros on each processor */
2218     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2219     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2220     for (i=0; i<size; i++) {
2221       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2222         procsnz[i] += rowlengths[j];
2223       }
2224     }
2225     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2226 
2227     /* determine max buffer needed and allocate it */
2228     maxnz = 0;
2229     for (i=0; i<size; i++) {
2230       maxnz = PetscMax(maxnz,procsnz[i]);
2231     }
2232     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2233 
2234     /* read in my part of the matrix column indices  */
2235     nz     = procsnz[0];
2236     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2237     mycols = ibuf;
2238     if (size == 1)  nz -= extra_rows;
2239     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2240     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2241 
2242     /* read in every ones (except the last) and ship off */
2243     for (i=1; i<size-1; i++) {
2244       nz   = procsnz[i];
2245       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2246       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2247     }
2248     /* read in the stuff for the last proc */
2249     if (size != 1) {
2250       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2251       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2252       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2253       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2254     }
2255     ierr = PetscFree(cols);CHKERRQ(ierr);
2256   } else {  /* procs[i], i>0 */
2257     /* determine buffer space needed for message */
2258     nz = 0;
2259     for (i=0; i<m; i++) {
2260       nz += locrowlens[i];
2261     }
2262     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2263     mycols = ibuf;
2264     /* receive message of column indices*/
2265     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2266     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2267     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2268   }
2269 
2270   /* loop over local rows, determining number of off diagonal entries */
2271   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2272   odlens   = dlens + (rend-rstart);
2273   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2274   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2275   masked1  = mask    + Mbs;
2276   masked2  = masked1 + Mbs;
2277   rowcount = 0; nzcount = 0;
2278   for (i=0; i<mbs; i++) {
2279     dcount  = 0;
2280     odcount = 0;
2281     for (j=0; j<bs; j++) {
2282       kmax = locrowlens[rowcount];
2283       for (k=0; k<kmax; k++) {
2284         tmp = mycols[nzcount++]/bs; /* block col. index */
2285         if (!mask[tmp]) {
2286           mask[tmp] = 1;
2287           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2288           else masked1[dcount++] = tmp; /* entry in diag portion */
2289         }
2290       }
2291       rowcount++;
2292     }
2293 
2294     dlens[i]  = dcount;  /* d_nzz[i] */
2295     odlens[i] = odcount; /* o_nzz[i] */
2296 
2297     /* zero out the mask elements we set */
2298     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2299     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2300   }
2301 
2302   /* create our matrix */
2303   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2304   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2305   ierr = MatSetType(A,type);CHKERRQ(ierr);
2306   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2307 
2308   if (!rank) {
2309     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2310     /* read in my part of the matrix numerical values  */
2311     nz = procsnz[0];
2312     vals = buf;
2313     mycols = ibuf;
2314     if (size == 1)  nz -= extra_rows;
2315     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2316     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2317 
2318     /* insert into matrix */
2319     jj      = rstart*bs;
2320     for (i=0; i<m; i++) {
2321       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2322       mycols += locrowlens[i];
2323       vals   += locrowlens[i];
2324       jj++;
2325     }
2326 
2327     /* read in other processors (except the last one) and ship out */
2328     for (i=1; i<size-1; i++) {
2329       nz   = procsnz[i];
2330       vals = buf;
2331       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2332       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2333     }
2334     /* the last proc */
2335     if (size != 1){
2336       nz   = procsnz[i] - extra_rows;
2337       vals = buf;
2338       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2339       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2340       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,((PetscObject)A)->tag,comm);CHKERRQ(ierr);
2341     }
2342     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2343 
2344   } else {
2345     /* receive numeric values */
2346     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2347 
2348     /* receive message of values*/
2349     vals   = buf;
2350     mycols = ibuf;
2351     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,((PetscObject)A)->tag,comm,&status);CHKERRQ(ierr);
2352     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2353     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2354 
2355     /* insert into matrix */
2356     jj      = rstart*bs;
2357     for (i=0; i<m; i++) {
2358       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2359       mycols += locrowlens[i];
2360       vals   += locrowlens[i];
2361       jj++;
2362     }
2363   }
2364 
2365   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2366   ierr = PetscFree(buf);CHKERRQ(ierr);
2367   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2368   ierr = PetscFree(rowners);CHKERRQ(ierr);
2369   ierr = PetscFree(dlens);CHKERRQ(ierr);
2370   ierr = PetscFree(mask);CHKERRQ(ierr);
2371   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2372   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2373   *newmat = A;
2374   PetscFunctionReturn(0);
2375 }
2376 
2377 #undef __FUNCT__
2378 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2379 /*XXXXX@
2380    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2381 
2382    Input Parameters:
2383 .  mat  - the matrix
2384 .  fact - factor
2385 
2386    Collective on Mat
2387 
2388    Level: advanced
2389 
2390   Notes:
2391    This can also be set by the command line option: -mat_use_hash_table fact
2392 
2393 .keywords: matrix, hashtable, factor, HT
2394 
2395 .seealso: MatSetOption()
2396 @XXXXX*/
2397 
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatGetRowMaxAbs_MPISBAIJ"
2401 PetscErrorCode MatGetRowMaxAbs_MPISBAIJ(Mat A,Vec v,PetscInt idx[])
2402 {
2403   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2404   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2405   PetscReal      atmp;
2406   PetscReal      *work,*svalues,*rvalues;
2407   PetscErrorCode ierr;
2408   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2409   PetscMPIInt    rank,size;
2410   PetscInt       *rowners_bs,dest,count,source;
2411   PetscScalar    *va;
2412   MatScalar      *ba;
2413   MPI_Status     stat;
2414 
2415   PetscFunctionBegin;
2416   if (idx) SETERRQ(PETSC_ERR_SUP,"Send email to petsc-maint@mcs.anl.gov");
2417   ierr = MatGetRowMaxAbs(a->A,v,PETSC_NULL);CHKERRQ(ierr);
2418   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2419 
2420   ierr = MPI_Comm_size(((PetscObject)A)->comm,&size);CHKERRQ(ierr);
2421   ierr = MPI_Comm_rank(((PetscObject)A)->comm,&rank);CHKERRQ(ierr);
2422 
2423   bs   = A->rmap.bs;
2424   mbs  = a->mbs;
2425   Mbs  = a->Mbs;
2426   ba   = b->a;
2427   bi   = b->i;
2428   bj   = b->j;
2429 
2430   /* find ownerships */
2431   rowners_bs = A->rmap.range;
2432 
2433   /* each proc creates an array to be distributed */
2434   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2435   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2436 
2437   /* row_max for B */
2438   if (rank != size-1){
2439     for (i=0; i<mbs; i++) {
2440       ncols = bi[1] - bi[0]; bi++;
2441       brow  = bs*i;
2442       for (j=0; j<ncols; j++){
2443         bcol = bs*(*bj);
2444         for (kcol=0; kcol<bs; kcol++){
2445           col = bcol + kcol;                 /* local col index */
2446           col += rowners_bs[rank+1];      /* global col index */
2447           for (krow=0; krow<bs; krow++){
2448             atmp = PetscAbsScalar(*ba); ba++;
2449             row = brow + krow;    /* local row index */
2450             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2451             if (work[col] < atmp) work[col] = atmp;
2452           }
2453         }
2454         bj++;
2455       }
2456     }
2457 
2458     /* send values to its owners */
2459     for (dest=rank+1; dest<size; dest++){
2460       svalues = work + rowners_bs[dest];
2461       count   = rowners_bs[dest+1]-rowners_bs[dest];
2462       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,((PetscObject)A)->comm);CHKERRQ(ierr);
2463     }
2464   }
2465 
2466   /* receive values */
2467   if (rank){
2468     rvalues = work;
2469     count   = rowners_bs[rank+1]-rowners_bs[rank];
2470     for (source=0; source<rank; source++){
2471       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,((PetscObject)A)->comm,&stat);CHKERRQ(ierr);
2472       /* process values */
2473       for (i=0; i<count; i++){
2474         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2475       }
2476     }
2477   }
2478 
2479   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2480   ierr = PetscFree(work);CHKERRQ(ierr);
2481   PetscFunctionReturn(0);
2482 }
2483 
2484 #undef __FUNCT__
2485 #define __FUNCT__ "MatRelax_MPISBAIJ"
2486 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2487 {
2488   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2489   PetscErrorCode ierr;
2490   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2491   PetscScalar    *x,*b,*ptr,zero=0.0;
2492   Vec            bb1;
2493 
2494   PetscFunctionBegin;
2495   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2496   if (bs > 1)
2497     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2498 
2499   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2500     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2501       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2502       its--;
2503     }
2504 
2505     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2506     while (its--){
2507 
2508       /* lower triangular part: slvec0b = - B^T*xx */
2509       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2510 
2511       /* copy xx into slvec0a */
2512       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2513       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2514       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2515       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2516 
2517       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2518 
2519       /* copy bb into slvec1a */
2520       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2521       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2522       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2523       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2524 
2525       /* set slvec1b = 0 */
2526       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2527 
2528       ierr = VecScatterBegin(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2529       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2530       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2531       ierr = VecScatterEnd(mat->sMvctx,mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2532 
2533       /* upper triangular part: bb1 = bb1 - B*x */
2534       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2535 
2536       /* local diagonal sweep */
2537       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2538     }
2539     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2540   } else {
2541     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2542   }
2543   PetscFunctionReturn(0);
2544 }
2545 
2546 #undef __FUNCT__
2547 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2548 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2549 {
2550   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2551   PetscErrorCode ierr;
2552   Vec            lvec1,bb1;
2553 
2554   PetscFunctionBegin;
2555   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2556   if (matin->rmap.bs > 1)
2557     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2558 
2559   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2560     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2561       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2562       its--;
2563     }
2564 
2565     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2566     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2567     while (its--){
2568       ierr = VecScatterBegin(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2569 
2570       /* lower diagonal part: bb1 = bb - B^T*xx */
2571       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2572       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2573 
2574       ierr = VecScatterEnd(mat->Mvctx,xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr);
2575       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2576       ierr = VecScatterBegin(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2577 
2578       /* upper diagonal part: bb1 = bb1 - B*x */
2579       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2580       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2581 
2582       ierr = VecScatterEnd(mat->Mvctx,lvec1,bb1,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr);
2583 
2584       /* diagonal sweep */
2585       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2586     }
2587     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2588     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2589   } else {
2590     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2591   }
2592   PetscFunctionReturn(0);
2593 }
2594 
2595