xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 906ed7cc33fecbafab01746eec64dcdcc8a4842f)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \
103       N = nrow++ - 1;  \
104       /* shift up all the later entries in this row */ \
105       for (ii=N; ii>=_i; ii--) { \
106         rp[ii+1] = rp[ii]; \
107         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
108       } \
109       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
110       rp[_i]                      = bcol;  \
111       ap[bs2*_i + bs*cidx + ridx] = value;  \
112       a_noinsert:; \
113     ailen[brow] = nrow; \
114 }
115 #ifndef MatSetValues_SeqBAIJ_B_Private
116 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
117 { \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for (_i=low; _i<high; _i++) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
140       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       b_noinsert:; \
151     bilen[brow] = nrow; \
152 }
153 #endif
154 
155 #if defined(PETSC_USE_MAT_SINGLE)
156 #undef __FUNCT__
157 #define __FUNCT__ "MatSetValues_MPISBAIJ"
158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
159 {
160   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,N = m*n;
163   MatScalar      *vsingle;
164 
165   PetscFunctionBegin;
166   if (N > b->setvalueslen) {
167     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
168     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
169     b->setvalueslen  = N;
170   }
171   vsingle = b->setvaluescopy;
172 
173   for (i=0; i<N; i++) {
174     vsingle[i] = v[i];
175   }
176   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
183 {
184   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
185   PetscErrorCode ierr;
186   PetscInt       i,N = m*n*b->bs2;
187   MatScalar      *vsingle;
188 
189   PetscFunctionBegin;
190   if (N > b->setvalueslen) {
191     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
192     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
193     b->setvalueslen  = N;
194   }
195   vsingle = b->setvaluescopy;
196   for (i=0; i<N; i++) {
197     vsingle[i] = v[i];
198   }
199   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 #endif
203 
204 /* Only add/insert a(i,j) with i<=j (blocks).
205    Any a(i,j) with i>j input by user is ingored.
206 */
207 #undef __FUNCT__
208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscTruth     roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap.rstart;
217   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
218   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
219 
220   /* Some Variables required in the macro */
221   Mat            A = baij->A;
222   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar      *aa=a->a;
225 
226   Mat            B = baij->B;
227   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar     *ba=b->a;
230 
231   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar     *ap,*bap;
234 
235   /* for stash */
236   PetscInt      n_loc, *in_loc = PETSC_NULL;
237   MatScalar     *v_loc = PETSC_NULL;
238 
239   PetscFunctionBegin;
240 
241   if (!baij->donotstash){
242     if (n > baij->n_loc) {
243       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
244       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
246       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs){
262           if (a->ignore_ltriangular){
263             continue;    /* ignore lower triangular blocks */
264           } else {
265             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
266           }
267         }
268         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
269           col = in[j] - cstart_orig;          /* local col index */
270           brow = row/bs; bcol = col/bs;
271           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
272           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
274           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
275         } else if (in[j] < 0) continue;
276 #if defined(PETSC_USE_DEBUG)
277         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
278 #endif
279         else {  /* off-diag entry (B) */
280           if (mat->was_assembled) {
281             if (!baij->colmap) {
282               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
283             }
284 #if defined (PETSC_USE_CTABLE)
285             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
286             col  = col - 1;
287 #else
288             col = baij->colmap[in[j]/bs] - 1;
289 #endif
290             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
291               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
292               col =  in[j];
293               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
294               B = baij->B;
295               b = (Mat_SeqBAIJ*)(B)->data;
296               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
297               ba=b->a;
298             } else col += in[j]%bs;
299           } else col = in[j];
300           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (!baij->donotstash) {
307         n_loc = 0;
308         for (j=0; j<n; j++){
309           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
310           in_loc[n_loc] = in[j];
311           if (roworiented) {
312             v_loc[n_loc] = v[i*n+j];
313           } else {
314             v_loc[n_loc] = v[j*m+i];
315           }
316           n_loc++;
317         }
318         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
319       }
320     }
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
328 {
329   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
330   const MatScalar *value;
331   MatScalar       *barray=baij->barray;
332   PetscTruth      roworiented = baij->roworiented;
333   PetscErrorCode  ierr;
334   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
335   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
336   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
337 
338   PetscFunctionBegin;
339   if(!barray) {
340     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
341     baij->barray = barray;
342   }
343 
344   if (roworiented) {
345     stepval = (n-1)*bs;
346   } else {
347     stepval = (m-1)*bs;
348   }
349   for (i=0; i<m; i++) {
350     if (im[i] < 0) continue;
351 #if defined(PETSC_USE_DEBUG)
352     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
353 #endif
354     if (im[i] >= rstart && im[i] < rend) {
355       row = im[i] - rstart;
356       for (j=0; j<n; j++) {
357         /* If NumCol = 1 then a copy is not required */
358         if ((roworiented) && (n == 1)) {
359           barray = (MatScalar*) v + i*bs2;
360         } else if((!roworiented) && (m == 1)) {
361           barray = (MatScalar*) v + j*bs2;
362         } else { /* Here a copy is required */
363           if (roworiented) {
364             value = v + i*(stepval+bs)*bs + j*bs;
365           } else {
366             value = v + j*(stepval+bs)*bs + i*bs;
367           }
368           for (ii=0; ii<bs; ii++,value+=stepval) {
369             for (jj=0; jj<bs; jj++) {
370               *barray++  = *value++;
371             }
372           }
373           barray -=bs2;
374         }
375 
376         if (in[j] >= cstart && in[j] < cend){
377           col  = in[j] - cstart;
378           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
379         }
380         else if (in[j] < 0) continue;
381 #if defined(PETSC_USE_DEBUG)
382         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
383 #endif
384         else {
385           if (mat->was_assembled) {
386             if (!baij->colmap) {
387               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388             }
389 
390 #if defined(PETSC_USE_DEBUG)
391 #if defined (PETSC_USE_CTABLE)
392             { PetscInt data;
393               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
394               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
395             }
396 #else
397             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
398 #endif
399 #endif
400 #if defined (PETSC_USE_CTABLE)
401 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
402             col  = (col - 1)/bs;
403 #else
404             col = (baij->colmap[in[j]] - 1)/bs;
405 #endif
406             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
407               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
408               col =  in[j];
409             }
410           }
411           else col = in[j];
412           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
413         }
414       }
415     } else {
416       if (!baij->donotstash) {
417         if (roworiented) {
418           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
419         } else {
420           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
421         }
422       }
423     }
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatGetValues_MPISBAIJ"
430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
431 {
432   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
433   PetscErrorCode ierr;
434   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
435   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
436 
437   PetscFunctionBegin;
438   for (i=0; i<m; i++) {
439     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
440     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
441     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
442       row = idxm[i] - bsrstart;
443       for (j=0; j<n; j++) {
444         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
445         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
446         if (idxn[j] >= bscstart && idxn[j] < bscend){
447           col = idxn[j] - bscstart;
448           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
449         } else {
450           if (!baij->colmap) {
451             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
452           }
453 #if defined (PETSC_USE_CTABLE)
454           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
455           data --;
456 #else
457           data = baij->colmap[idxn[j]/bs]-1;
458 #endif
459           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
460           else {
461             col  = data + idxn[j]%bs;
462             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
463           }
464         }
465       }
466     } else {
467       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
468     }
469   }
470  PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatNorm_MPISBAIJ"
475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
476 {
477   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
478   PetscErrorCode ierr;
479   PetscReal      sum[2],*lnorm2;
480 
481   PetscFunctionBegin;
482   if (baij->size == 1) {
483     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
484   } else {
485     if (type == NORM_FROBENIUS) {
486       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
487       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
488       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
489       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
490       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
491       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
492       *norm = sqrt(sum[0] + 2*sum[1]);
493       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
494     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
495       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
496       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
497       PetscReal    *rsum,*rsum2,vabs;
498       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
499       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
500       MatScalar    *v;
501 
502       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
503       rsum2 = rsum + mat->cmap.N;
504       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
505       /* Amat */
506       v = amat->a; jj = amat->j;
507       for (brow=0; brow<mbs; brow++) {
508         grow = bs*(rstart + brow);
509         nz = amat->i[brow+1] - amat->i[brow];
510         for (bcol=0; bcol<nz; bcol++){
511           gcol = bs*(rstart + *jj); jj++;
512           for (col=0; col<bs; col++){
513             for (row=0; row<bs; row++){
514               vabs = PetscAbsScalar(*v); v++;
515               rsum[gcol+col] += vabs;
516               /* non-diagonal block */
517               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
518             }
519           }
520         }
521       }
522       /* Bmat */
523       v = bmat->a; jj = bmat->j;
524       for (brow=0; brow<mbs; brow++) {
525         grow = bs*(rstart + brow);
526         nz = bmat->i[brow+1] - bmat->i[brow];
527         for (bcol=0; bcol<nz; bcol++){
528           gcol = bs*garray[*jj]; jj++;
529           for (col=0; col<bs; col++){
530             for (row=0; row<bs; row++){
531               vabs = PetscAbsScalar(*v); v++;
532               rsum[gcol+col] += vabs;
533               rsum[grow+row] += vabs;
534             }
535           }
536         }
537       }
538       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
539       *norm = 0.0;
540       for (col=0; col<mat->cmap.N; col++) {
541         if (rsum2[col] > *norm) *norm = rsum2[col];
542       }
543       ierr = PetscFree(rsum);CHKERRQ(ierr);
544     } else {
545       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
554 {
555   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
556   PetscErrorCode ierr;
557   PetscInt       nstash,reallocs;
558   InsertMode     addv;
559 
560   PetscFunctionBegin;
561   if (baij->donotstash) {
562     PetscFunctionReturn(0);
563   }
564 
565   /* make sure all processors are either in INSERTMODE or ADDMODE */
566   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
567   if (addv == (ADD_VALUES|INSERT_VALUES)) {
568     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
569   }
570   mat->insertmode = addv; /* in case this processor had no cache */
571 
572   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
573   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
574   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
575   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
576   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
577   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
584 {
585   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
586   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589   PetscInt       *row,*col,other_disassembled;
590   PetscMPIInt    n;
591   PetscTruth     r1,r2,r3;
592   MatScalar      *val;
593   InsertMode     addv = mat->insertmode;
594 
595   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
596   PetscFunctionBegin;
597 
598   if (!baij->donotstash) {
599     while (1) {
600       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
601       if (!flg) break;
602 
603       for (i=0; i<n;) {
604         /* Now identify the consecutive vals belonging to the same row */
605         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
606         if (j < n) ncols = j-i;
607         else       ncols = n-i;
608         /* Now assemble all these values with a single function call */
609         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
610         i = j;
611       }
612     }
613     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
614     /* Now process the block-stash. Since the values are stashed column-oriented,
615        set the roworiented flag to column oriented, and after MatSetValues()
616        restore the original flags */
617     r1 = baij->roworiented;
618     r2 = a->roworiented;
619     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
620     baij->roworiented = PETSC_FALSE;
621     a->roworiented    = PETSC_FALSE;
622     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
623     while (1) {
624       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
625       if (!flg) break;
626 
627       for (i=0; i<n;) {
628         /* Now identify the consecutive vals belonging to the same row */
629         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
630         if (j < n) ncols = j-i;
631         else       ncols = n-i;
632         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
633         i = j;
634       }
635     }
636     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
637     baij->roworiented = r1;
638     a->roworiented    = r2;
639     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
640   }
641 
642   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
643   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
644 
645   /* determine if any processor has disassembled, if so we must
646      also disassemble ourselfs, in order that we may reassemble. */
647   /*
648      if nonzero structure of submatrix B cannot change then we know that
649      no processor disassembled thus we can skip this stuff
650   */
651   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
652     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
653     if (mat->was_assembled && !other_disassembled) {
654       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
655     }
656   }
657 
658   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
659     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
660   }
661   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
662   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
663   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
664 
665   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
666   baij->rowvalues = 0;
667 
668   PetscFunctionReturn(0);
669 }
670 
671 #undef __FUNCT__
672 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
673 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
674 {
675   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
676   PetscErrorCode    ierr;
677   PetscInt          bs = mat->rmap.bs;
678   PetscMPIInt       size = baij->size,rank = baij->rank;
679   PetscTruth        iascii,isdraw;
680   PetscViewer       sviewer;
681   PetscViewerFormat format;
682 
683   PetscFunctionBegin;
684   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
686   if (iascii) {
687     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
688     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
689       MatInfo info;
690       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
691       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
692       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
693               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
694               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
695       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
696       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
697       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
698       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
699       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
700       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
701       PetscFunctionReturn(0);
702     } else if (format == PETSC_VIEWER_ASCII_INFO) {
703       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
704       PetscFunctionReturn(0);
705     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
706       PetscFunctionReturn(0);
707     }
708   }
709 
710   if (isdraw) {
711     PetscDraw  draw;
712     PetscTruth isnull;
713     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
714     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
715   }
716 
717   if (size == 1) {
718     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
719     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
720   } else {
721     /* assemble the entire matrix onto first processor. */
722     Mat          A;
723     Mat_SeqSBAIJ *Aloc;
724     Mat_SeqBAIJ  *Bloc;
725     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
726     MatScalar    *a;
727 
728     /* Should this be the same type as mat? */
729     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
730     if (!rank) {
731       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
732     } else {
733       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
734     }
735     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
736     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
737     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
738 
739     /* copy over the A part */
740     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
741     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
742     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
743 
744     for (i=0; i<mbs; i++) {
745       rvals[0] = bs*(baij->rstartbs + i);
746       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
747       for (j=ai[i]; j<ai[i+1]; j++) {
748         col = (baij->cstartbs+aj[j])*bs;
749         for (k=0; k<bs; k++) {
750           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
751           col++; a += bs;
752         }
753       }
754     }
755     /* copy over the B part */
756     Bloc = (Mat_SeqBAIJ*)baij->B->data;
757     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
758     for (i=0; i<mbs; i++) {
759 
760       rvals[0] = bs*(baij->rstartbs + i);
761       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
762       for (j=ai[i]; j<ai[i+1]; j++) {
763         col = baij->garray[aj[j]]*bs;
764         for (k=0; k<bs; k++) {
765           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
766           col++; a += bs;
767         }
768       }
769     }
770     ierr = PetscFree(rvals);CHKERRQ(ierr);
771     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
772     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773     /*
774        Everyone has to call to draw the matrix since the graphics waits are
775        synchronized across all processors that share the PetscDraw object
776     */
777     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
778     if (!rank) {
779       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
780       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
781     }
782     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
783     ierr = MatDestroy(A);CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPISBAIJ"
790 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
791 {
792   PetscErrorCode ierr;
793   PetscTruth     iascii,isdraw,issocket,isbinary;
794 
795   PetscFunctionBegin;
796   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
797   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
800   if (iascii || isdraw || issocket || isbinary) {
801     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
802   } else {
803     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
804   }
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatDestroy_MPISBAIJ"
810 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
811 {
812   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816 #if defined(PETSC_USE_LOG)
817   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
818 #endif
819   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
820   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
821   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
822   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
823 #if defined (PETSC_USE_CTABLE)
824   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
825 #else
826   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
827 #endif
828   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
829   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
830   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
831   if (baij->slvec0) {
832     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
834   }
835   if (baij->slvec1) {
836     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
837     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
838     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
839   }
840   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
841   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
842   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
843   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
844 #if defined(PETSC_USE_MAT_SINGLE)
845   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
846 #endif
847   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
848   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
849   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
850   ierr = PetscFree(baij);CHKERRQ(ierr);
851 
852   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatMult_MPISBAIJ"
861 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
862 {
863   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
864   PetscErrorCode ierr;
865   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
866   PetscScalar    *x,*from,zero=0.0;
867 
868   PetscFunctionBegin;
869   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
870   if (nt != A->cmap.n) {
871     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
872   }
873 
874   /* diagonal part */
875   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
876   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
877 
878   /* subdiagonal part */
879   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
880   CHKMEMQ;
881   /* copy x into the vec slvec0 */
882   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
883   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
884   CHKMEMQ;
885   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
886   CHKMEMQ;
887   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
888 
889   CHKMEMQ;
890   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
891   CHKMEMQ;
892   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
893   CHKMEMQ;
894   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
895     CHKMEMQ;
896   /* supperdiagonal part */
897   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
898     CHKMEMQ;
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
904 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
905 {
906   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       nt;
909 
910   PetscFunctionBegin;
911   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
912   if (nt != A->cmap.n) {
913     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
914   }
915   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
916   if (nt != A->rmap.N) {
917     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
918   }
919 
920   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
921   /* do diagonal part */
922   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
923   /* do supperdiagonal part */
924   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
925   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
926   /* do subdiagonal part */
927   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
928   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
929   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
930 
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
936 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
941   PetscScalar    *x,*from,zero=0.0;
942 
943   PetscFunctionBegin;
944   /*
945   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
946   PetscSynchronizedFlush(A->comm);
947   */
948   /* diagonal part */
949   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
950   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
951 
952   /* subdiagonal part */
953   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
954 
955   /* copy x into the vec slvec0 */
956   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
957   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
958   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
959   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
960 
961   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
962   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
963   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
964 
965   /* supperdiagonal part */
966   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
967 
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
973 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
980   /* do diagonal part */
981   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
982   /* do supperdiagonal part */
983   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
984   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
985 
986   /* do subdiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
988   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
989   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
990 
991   PetscFunctionReturn(0);
992 }
993 
994 /*
995   This only works correctly for square matrices where the subblock A->A is the
996    diagonal block
997 */
998 #undef __FUNCT__
999 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1000 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1001 {
1002   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1003   PetscErrorCode ierr;
1004 
1005   PetscFunctionBegin;
1006   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1007   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatScale_MPISBAIJ"
1013 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1014 {
1015   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1020   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1026 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1027 {
1028   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1029   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1030   PetscErrorCode ierr;
1031   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1032   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1033   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1034 
1035   PetscFunctionBegin;
1036   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1037   mat->getrowactive = PETSC_TRUE;
1038 
1039   if (!mat->rowvalues && (idx || v)) {
1040     /*
1041         allocate enough space to hold information from the longest row.
1042     */
1043     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1044     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1045     PetscInt     max = 1,mbs = mat->mbs,tmp;
1046     for (i=0; i<mbs; i++) {
1047       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1048       if (max < tmp) { max = tmp; }
1049     }
1050     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1051     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1052   }
1053 
1054   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1055   lrow = row - brstart;  /* local row index */
1056 
1057   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1058   if (!v)   {pvA = 0; pvB = 0;}
1059   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1060   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1061   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1062   nztot = nzA + nzB;
1063 
1064   cmap  = mat->garray;
1065   if (v  || idx) {
1066     if (nztot) {
1067       /* Sort by increasing column numbers, assuming A and B already sorted */
1068       PetscInt imark = -1;
1069       if (v) {
1070         *v = v_p = mat->rowvalues;
1071         for (i=0; i<nzB; i++) {
1072           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1073           else break;
1074         }
1075         imark = i;
1076         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1077         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1078       }
1079       if (idx) {
1080         *idx = idx_p = mat->rowindices;
1081         if (imark > -1) {
1082           for (i=0; i<imark; i++) {
1083             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1084           }
1085         } else {
1086           for (i=0; i<nzB; i++) {
1087             if (cmap[cworkB[i]/bs] < cstart)
1088               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1089             else break;
1090           }
1091           imark = i;
1092         }
1093         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1094         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1095       }
1096     } else {
1097       if (idx) *idx = 0;
1098       if (v)   *v   = 0;
1099     }
1100   }
1101   *nz = nztot;
1102   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1103   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1109 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1110 {
1111   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1112 
1113   PetscFunctionBegin;
1114   if (!baij->getrowactive) {
1115     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1116   }
1117   baij->getrowactive = PETSC_FALSE;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1123 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1124 {
1125   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1126   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1127 
1128   PetscFunctionBegin;
1129   aA->getrow_utriangular = PETSC_TRUE;
1130   PetscFunctionReturn(0);
1131 }
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1134 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1135 {
1136   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1137   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1138 
1139   PetscFunctionBegin;
1140   aA->getrow_utriangular = PETSC_FALSE;
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1146 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1147 {
1148   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1149   PetscErrorCode ierr;
1150 
1151   PetscFunctionBegin;
1152   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1153   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1159 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1160 {
1161   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1162   PetscErrorCode ierr;
1163 
1164   PetscFunctionBegin;
1165   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1166   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1172 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1173 {
1174   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1179   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1185 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1186 {
1187   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1188   Mat            A = a->A,B = a->B;
1189   PetscErrorCode ierr;
1190   PetscReal      isend[5],irecv[5];
1191 
1192   PetscFunctionBegin;
1193   info->block_size     = (PetscReal)matin->rmap.bs;
1194   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1195   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1196   isend[3] = info->memory;  isend[4] = info->mallocs;
1197   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1198   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1199   isend[3] += info->memory;  isend[4] += info->mallocs;
1200   if (flag == MAT_LOCAL) {
1201     info->nz_used      = isend[0];
1202     info->nz_allocated = isend[1];
1203     info->nz_unneeded  = isend[2];
1204     info->memory       = isend[3];
1205     info->mallocs      = isend[4];
1206   } else if (flag == MAT_GLOBAL_MAX) {
1207     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1208     info->nz_used      = irecv[0];
1209     info->nz_allocated = irecv[1];
1210     info->nz_unneeded  = irecv[2];
1211     info->memory       = irecv[3];
1212     info->mallocs      = irecv[4];
1213   } else if (flag == MAT_GLOBAL_SUM) {
1214     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1215     info->nz_used      = irecv[0];
1216     info->nz_allocated = irecv[1];
1217     info->nz_unneeded  = irecv[2];
1218     info->memory       = irecv[3];
1219     info->mallocs      = irecv[4];
1220   } else {
1221     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1222   }
1223   info->rows_global       = (PetscReal)A->rmap.N;
1224   info->columns_global    = (PetscReal)A->cmap.N;
1225   info->rows_local        = (PetscReal)A->rmap.N;
1226   info->columns_local     = (PetscReal)A->cmap.N;
1227   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1228   info->fill_ratio_needed = 0;
1229   info->factor_mallocs    = 0;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1235 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1236 {
1237   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1238   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   switch (op) {
1243   case MAT_NO_NEW_NONZERO_LOCATIONS:
1244   case MAT_YES_NEW_NONZERO_LOCATIONS:
1245   case MAT_COLUMNS_UNSORTED:
1246   case MAT_COLUMNS_SORTED:
1247   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1248   case MAT_KEEP_ZEROED_ROWS:
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1251     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1252     break;
1253   case MAT_ROW_ORIENTED:
1254     a->roworiented = PETSC_TRUE;
1255     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1256     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1257     break;
1258   case MAT_ROWS_SORTED:
1259   case MAT_ROWS_UNSORTED:
1260   case MAT_YES_NEW_DIAGONALS:
1261     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
1262     break;
1263   case MAT_COLUMN_ORIENTED:
1264     a->roworiented = PETSC_FALSE;
1265     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1266     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1267     break;
1268   case MAT_IGNORE_OFF_PROC_ENTRIES:
1269     a->donotstash = PETSC_TRUE;
1270     break;
1271   case MAT_NO_NEW_DIAGONALS:
1272     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1273   case MAT_USE_HASH_TABLE:
1274     a->ht_flag = PETSC_TRUE;
1275     break;
1276   case MAT_NOT_SYMMETRIC:
1277   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1278   case MAT_HERMITIAN:
1279     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1280   case MAT_SYMMETRIC:
1281   case MAT_STRUCTURALLY_SYMMETRIC:
1282   case MAT_NOT_HERMITIAN:
1283   case MAT_SYMMETRY_ETERNAL:
1284   case MAT_NOT_SYMMETRY_ETERNAL:
1285     break;
1286   case MAT_IGNORE_LOWER_TRIANGULAR:
1287     aA->ignore_ltriangular = PETSC_TRUE;
1288     break;
1289   case MAT_ERROR_LOWER_TRIANGULAR:
1290     aA->ignore_ltriangular = PETSC_FALSE;
1291     break;
1292   case MAT_GETROW_UPPERTRIANGULAR:
1293     aA->getrow_utriangular = PETSC_TRUE;
1294     break;
1295   default:
1296     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1303 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1304 {
1305   PetscErrorCode ierr;
1306   PetscFunctionBegin;
1307   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1313 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1314 {
1315   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1316   Mat            a=baij->A, b=baij->B;
1317   PetscErrorCode ierr;
1318   PetscInt       nv,m,n;
1319   PetscTruth     flg;
1320 
1321   PetscFunctionBegin;
1322   if (ll != rr){
1323     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1324     if (!flg)
1325       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1326   }
1327   if (!ll) PetscFunctionReturn(0);
1328 
1329   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1330   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1331 
1332   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1333   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1334 
1335   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1336 
1337   /* left diagonalscale the off-diagonal part */
1338   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1339 
1340   /* scale the diagonal part */
1341   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1342 
1343   /* right diagonalscale the off-diagonal part */
1344   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1345   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1351 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1352 {
1353   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1354   PetscErrorCode ierr;
1355 
1356   PetscFunctionBegin;
1357   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1358   PetscFunctionReturn(0);
1359 }
1360 
1361 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatEqual_MPISBAIJ"
1365 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1366 {
1367   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1368   Mat            a,b,c,d;
1369   PetscTruth     flg;
1370   PetscErrorCode ierr;
1371 
1372   PetscFunctionBegin;
1373   a = matA->A; b = matA->B;
1374   c = matB->A; d = matB->B;
1375 
1376   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1377   if (flg) {
1378     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1379   }
1380   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatCopy_MPISBAIJ"
1386 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1387 {
1388   PetscErrorCode ierr;
1389   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1390   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1391 
1392   PetscFunctionBegin;
1393   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1394   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1395     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1396     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1397     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1398   } else {
1399     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1400     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1401   }
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1407 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1408 {
1409   PetscErrorCode ierr;
1410 
1411   PetscFunctionBegin;
1412   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1413   PetscFunctionReturn(0);
1414 }
1415 
1416 #include "petscblaslapack.h"
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1419 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1420 {
1421   PetscErrorCode ierr;
1422   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1423   PetscBLASInt   bnz,one=1;
1424   Mat_SeqSBAIJ   *xa,*ya;
1425   Mat_SeqBAIJ    *xb,*yb;
1426 
1427   PetscFunctionBegin;
1428   if (str == SAME_NONZERO_PATTERN) {
1429     PetscScalar alpha = a;
1430     xa = (Mat_SeqSBAIJ *)xx->A->data;
1431     ya = (Mat_SeqSBAIJ *)yy->A->data;
1432     bnz = (PetscBLASInt)xa->nz;
1433     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1434     xb = (Mat_SeqBAIJ *)xx->B->data;
1435     yb = (Mat_SeqBAIJ *)yy->B->data;
1436     bnz = (PetscBLASInt)xb->nz;
1437     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1438   } else {
1439     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1440     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1441     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1442   }
1443   PetscFunctionReturn(0);
1444 }
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1448 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1449 {
1450   PetscErrorCode ierr;
1451   PetscInt       i;
1452   PetscTruth     flg;
1453 
1454   PetscFunctionBegin;
1455   for (i=0; i<n; i++) {
1456     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1457     if (!flg) {
1458       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1459     }
1460   }
1461   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 
1466 /* -------------------------------------------------------------------*/
1467 static struct _MatOps MatOps_Values = {
1468        MatSetValues_MPISBAIJ,
1469        MatGetRow_MPISBAIJ,
1470        MatRestoreRow_MPISBAIJ,
1471        MatMult_MPISBAIJ,
1472 /* 4*/ MatMultAdd_MPISBAIJ,
1473        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1474        MatMultAdd_MPISBAIJ,
1475        0,
1476        0,
1477        0,
1478 /*10*/ 0,
1479        0,
1480        0,
1481        MatRelax_MPISBAIJ,
1482        MatTranspose_MPISBAIJ,
1483 /*15*/ MatGetInfo_MPISBAIJ,
1484        MatEqual_MPISBAIJ,
1485        MatGetDiagonal_MPISBAIJ,
1486        MatDiagonalScale_MPISBAIJ,
1487        MatNorm_MPISBAIJ,
1488 /*20*/ MatAssemblyBegin_MPISBAIJ,
1489        MatAssemblyEnd_MPISBAIJ,
1490        0,
1491        MatSetOption_MPISBAIJ,
1492        MatZeroEntries_MPISBAIJ,
1493 /*25*/ 0,
1494        0,
1495        0,
1496        0,
1497        0,
1498 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1499        0,
1500        0,
1501        0,
1502        0,
1503 /*35*/ MatDuplicate_MPISBAIJ,
1504        0,
1505        0,
1506        0,
1507        0,
1508 /*40*/ MatAXPY_MPISBAIJ,
1509        MatGetSubMatrices_MPISBAIJ,
1510        MatIncreaseOverlap_MPISBAIJ,
1511        MatGetValues_MPISBAIJ,
1512        MatCopy_MPISBAIJ,
1513 /*45*/ 0,
1514        MatScale_MPISBAIJ,
1515        0,
1516        0,
1517        0,
1518 /*50*/ 0,
1519        0,
1520        0,
1521        0,
1522        0,
1523 /*55*/ 0,
1524        0,
1525        MatSetUnfactored_MPISBAIJ,
1526        0,
1527        MatSetValuesBlocked_MPISBAIJ,
1528 /*60*/ 0,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*65*/ 0,
1534        0,
1535        0,
1536        0,
1537        0,
1538 /*70*/ MatGetRowMax_MPISBAIJ,
1539        0,
1540        0,
1541        0,
1542        0,
1543 /*75*/ 0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*80*/ 0,
1549        0,
1550        0,
1551        0,
1552        MatLoad_MPISBAIJ,
1553 /*85*/ 0,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*90*/ 0,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*95*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*100*/0,
1569        0,
1570        0,
1571        0,
1572        0,
1573 /*105*/0,
1574        MatRealPart_MPISBAIJ,
1575        MatImaginaryPart_MPISBAIJ,
1576        MatGetRowUpperTriangular_MPISBAIJ,
1577        MatRestoreRowUpperTriangular_MPISBAIJ
1578 };
1579 
1580 
1581 EXTERN_C_BEGIN
1582 #undef __FUNCT__
1583 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1584 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1585 {
1586   PetscFunctionBegin;
1587   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1588   *iscopy = PETSC_FALSE;
1589   PetscFunctionReturn(0);
1590 }
1591 EXTERN_C_END
1592 
1593 EXTERN_C_BEGIN
1594 #undef __FUNCT__
1595 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1596 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1597 {
1598   Mat_MPISBAIJ   *b;
1599   PetscErrorCode ierr;
1600   PetscInt       i,mbs,Mbs;
1601 
1602   PetscFunctionBegin;
1603   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
1604     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
1605   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1606 
1607   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1608   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1609   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1610   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1611   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1612 
1613   B->rmap.bs = B->cmap.bs = bs;
1614   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1615   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1616 
1617   if (d_nnz) {
1618     for (i=0; i<B->rmap.n/bs; i++) {
1619       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1620     }
1621   }
1622   if (o_nnz) {
1623     for (i=0; i<B->rmap.n/bs; i++) {
1624       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1625     }
1626   }
1627   B->preallocated = PETSC_TRUE;
1628 
1629   b   = (Mat_MPISBAIJ*)B->data;
1630   mbs = B->rmap.n/bs;
1631   Mbs = B->rmap.N/bs;
1632   if (mbs*bs != B->rmap.n) {
1633     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1634   }
1635 
1636   B->rmap.bs  = bs;
1637   b->bs2 = bs*bs;
1638   b->mbs = mbs;
1639   b->nbs = mbs;
1640   b->Mbs = Mbs;
1641   b->Nbs = Mbs;
1642 
1643   for (i=0; i<=b->size; i++) {
1644     b->rangebs[i] = B->rmap.range[i]/bs;
1645   }
1646   b->rstartbs = B->rmap.rstart/bs;
1647   b->rendbs   = B->rmap.rend/bs;
1648 
1649   b->cstartbs = B->cmap.rstart/bs;
1650   b->cendbs   = B->cmap.rend/bs;
1651 
1652   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1653   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1654   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1655   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1656   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1657 
1658   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1659   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1660   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1661   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1662   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1663 
1664   /* build cache for off array entries formed */
1665   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1666 
1667   PetscFunctionReturn(0);
1668 }
1669 EXTERN_C_END
1670 
1671 /*MC
1672    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1673    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1674 
1675    Options Database Keys:
1676 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1677 
1678   Level: beginner
1679 
1680 .seealso: MatCreateMPISBAIJ
1681 M*/
1682 
1683 EXTERN_C_BEGIN
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatCreate_MPISBAIJ"
1686 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1687 {
1688   Mat_MPISBAIJ   *b;
1689   PetscErrorCode ierr;
1690   PetscTruth     flg;
1691 
1692   PetscFunctionBegin;
1693 
1694   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1695   B->data = (void*)b;
1696   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1697 
1698   B->ops->destroy    = MatDestroy_MPISBAIJ;
1699   B->ops->view       = MatView_MPISBAIJ;
1700   B->mapping    = 0;
1701   B->factor     = 0;
1702   B->assembled  = PETSC_FALSE;
1703 
1704   B->insertmode = NOT_SET_VALUES;
1705   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1706   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1707 
1708   /* build local table of row and column ownerships */
1709   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1710 
1711   /* build cache for off array entries formed */
1712   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1713   b->donotstash  = PETSC_FALSE;
1714   b->colmap      = PETSC_NULL;
1715   b->garray      = PETSC_NULL;
1716   b->roworiented = PETSC_TRUE;
1717 
1718 #if defined(PETSC_USE_MAT_SINGLE)
1719   /* stuff for MatSetValues_XXX in single precision */
1720   b->setvalueslen     = 0;
1721   b->setvaluescopy    = PETSC_NULL;
1722 #endif
1723 
1724   /* stuff used in block assembly */
1725   b->barray       = 0;
1726 
1727   /* stuff used for matrix vector multiply */
1728   b->lvec         = 0;
1729   b->Mvctx        = 0;
1730   b->slvec0       = 0;
1731   b->slvec0b      = 0;
1732   b->slvec1       = 0;
1733   b->slvec1a      = 0;
1734   b->slvec1b      = 0;
1735   b->sMvctx       = 0;
1736 
1737   /* stuff for MatGetRow() */
1738   b->rowindices   = 0;
1739   b->rowvalues    = 0;
1740   b->getrowactive = PETSC_FALSE;
1741 
1742   /* hash table stuff */
1743   b->ht           = 0;
1744   b->hd           = 0;
1745   b->ht_size      = 0;
1746   b->ht_flag      = PETSC_FALSE;
1747   b->ht_fact      = 0;
1748   b->ht_total_ct  = 0;
1749   b->ht_insert_ct = 0;
1750 
1751   b->in_loc       = 0;
1752   b->v_loc        = 0;
1753   b->n_loc        = 0;
1754   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr);
1755     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1756     if (flg) {
1757       PetscReal fact = 1.39;
1758       ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1759       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1760       if (fact <= 1.0) fact = 1.39;
1761       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1762       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1763     }
1764   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1765 
1766   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1767                                      "MatStoreValues_MPISBAIJ",
1768                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1770                                      "MatRetrieveValues_MPISBAIJ",
1771                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1773                                      "MatGetDiagonalBlock_MPISBAIJ",
1774                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1776                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1777                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1778   B->symmetric                  = PETSC_TRUE;
1779   B->structurally_symmetric     = PETSC_TRUE;
1780   B->symmetric_set              = PETSC_TRUE;
1781   B->structurally_symmetric_set = PETSC_TRUE;
1782   PetscFunctionReturn(0);
1783 }
1784 EXTERN_C_END
1785 
1786 /*MC
1787    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1788 
1789    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1790    and MATMPISBAIJ otherwise.
1791 
1792    Options Database Keys:
1793 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1794 
1795   Level: beginner
1796 
1797 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1798 M*/
1799 
1800 EXTERN_C_BEGIN
1801 #undef __FUNCT__
1802 #define __FUNCT__ "MatCreate_SBAIJ"
1803 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1804 {
1805   PetscErrorCode ierr;
1806   PetscMPIInt    size;
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1810   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1811   if (size == 1) {
1812     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1813   } else {
1814     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 EXTERN_C_END
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1822 /*@C
1823    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1824    the user should preallocate the matrix storage by setting the parameters
1825    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1826    performance can be increased by more than a factor of 50.
1827 
1828    Collective on Mat
1829 
1830    Input Parameters:
1831 +  A - the matrix
1832 .  bs   - size of blockk
1833 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1834            submatrix  (same for all local rows)
1835 .  d_nnz - array containing the number of block nonzeros in the various block rows
1836            in the upper triangular and diagonal part of the in diagonal portion of the local
1837            (possibly different for each block row) or PETSC_NULL.  You must leave room
1838            for the diagonal entry even if it is zero.
1839 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1840            submatrix (same for all local rows).
1841 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1842            off-diagonal portion of the local submatrix (possibly different for
1843            each block row) or PETSC_NULL.
1844 
1845 
1846    Options Database Keys:
1847 .   -mat_no_unroll - uses code that does not unroll the loops in the
1848                      block calculations (much slower)
1849 .   -mat_block_size - size of the blocks to use
1850 
1851    Notes:
1852 
1853    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1854    than it must be used on all processors that share the object for that argument.
1855 
1856    If the *_nnz parameter is given then the *_nz parameter is ignored
1857 
1858    Storage Information:
1859    For a square global matrix we define each processor's diagonal portion
1860    to be its local rows and the corresponding columns (a square submatrix);
1861    each processor's off-diagonal portion encompasses the remainder of the
1862    local matrix (a rectangular submatrix).
1863 
1864    The user can specify preallocated storage for the diagonal part of
1865    the local submatrix with either d_nz or d_nnz (not both).  Set
1866    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1867    memory allocation.  Likewise, specify preallocated storage for the
1868    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1869 
1870    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1871    the figure below we depict these three local rows and all columns (0-11).
1872 
1873 .vb
1874            0 1 2 3 4 5 6 7 8 9 10 11
1875           -------------------
1876    row 3  |  o o o d d d o o o o o o
1877    row 4  |  o o o d d d o o o o o o
1878    row 5  |  o o o d d d o o o o o o
1879           -------------------
1880 .ve
1881 
1882    Thus, any entries in the d locations are stored in the d (diagonal)
1883    submatrix, and any entries in the o locations are stored in the
1884    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1885    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1886 
1887    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1888    plus the diagonal part of the d matrix,
1889    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1890    In general, for PDE problems in which most nonzeros are near the diagonal,
1891    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1892    or you will get TERRIBLE performance; see the users' manual chapter on
1893    matrices.
1894 
1895    Level: intermediate
1896 
1897 .keywords: matrix, block, aij, compressed row, sparse, parallel
1898 
1899 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1900 @*/
1901 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1902 {
1903   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1904 
1905   PetscFunctionBegin;
1906   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1907   if (f) {
1908     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1909   }
1910   PetscFunctionReturn(0);
1911 }
1912 
1913 #undef __FUNCT__
1914 #define __FUNCT__ "MatCreateMPISBAIJ"
1915 /*@C
1916    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1917    (block compressed row).  For good matrix assembly performance
1918    the user should preallocate the matrix storage by setting the parameters
1919    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1920    performance can be increased by more than a factor of 50.
1921 
1922    Collective on MPI_Comm
1923 
1924    Input Parameters:
1925 +  comm - MPI communicator
1926 .  bs   - size of blockk
1927 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1928            This value should be the same as the local size used in creating the
1929            y vector for the matrix-vector product y = Ax.
1930 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1931            This value should be the same as the local size used in creating the
1932            x vector for the matrix-vector product y = Ax.
1933 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1934 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1935 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1936            submatrix  (same for all local rows)
1937 .  d_nnz - array containing the number of block nonzeros in the various block rows
1938            in the upper triangular portion of the in diagonal portion of the local
1939            (possibly different for each block block row) or PETSC_NULL.
1940            You must leave room for the diagonal entry even if it is zero.
1941 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1942            submatrix (same for all local rows).
1943 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1944            off-diagonal portion of the local submatrix (possibly different for
1945            each block row) or PETSC_NULL.
1946 
1947    Output Parameter:
1948 .  A - the matrix
1949 
1950    Options Database Keys:
1951 .   -mat_no_unroll - uses code that does not unroll the loops in the
1952                      block calculations (much slower)
1953 .   -mat_block_size - size of the blocks to use
1954 .   -mat_mpi - use the parallel matrix data structures even on one processor
1955                (defaults to using SeqBAIJ format on one processor)
1956 
1957    Notes:
1958    The number of rows and columns must be divisible by blocksize.
1959 
1960    The user MUST specify either the local or global matrix dimensions
1961    (possibly both).
1962 
1963    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1964    than it must be used on all processors that share the object for that argument.
1965 
1966    If the *_nnz parameter is given then the *_nz parameter is ignored
1967 
1968    Storage Information:
1969    For a square global matrix we define each processor's diagonal portion
1970    to be its local rows and the corresponding columns (a square submatrix);
1971    each processor's off-diagonal portion encompasses the remainder of the
1972    local matrix (a rectangular submatrix).
1973 
1974    The user can specify preallocated storage for the diagonal part of
1975    the local submatrix with either d_nz or d_nnz (not both).  Set
1976    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1977    memory allocation.  Likewise, specify preallocated storage for the
1978    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1979 
1980    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1981    the figure below we depict these three local rows and all columns (0-11).
1982 
1983 .vb
1984            0 1 2 3 4 5 6 7 8 9 10 11
1985           -------------------
1986    row 3  |  o o o d d d o o o o o o
1987    row 4  |  o o o d d d o o o o o o
1988    row 5  |  o o o d d d o o o o o o
1989           -------------------
1990 .ve
1991 
1992    Thus, any entries in the d locations are stored in the d (diagonal)
1993    submatrix, and any entries in the o locations are stored in the
1994    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1995    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1996 
1997    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1998    plus the diagonal part of the d matrix,
1999    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2000    In general, for PDE problems in which most nonzeros are near the diagonal,
2001    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2002    or you will get TERRIBLE performance; see the users' manual chapter on
2003    matrices.
2004 
2005    Level: intermediate
2006 
2007 .keywords: matrix, block, aij, compressed row, sparse, parallel
2008 
2009 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2010 @*/
2011 
2012 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2013 {
2014   PetscErrorCode ierr;
2015   PetscMPIInt    size;
2016 
2017   PetscFunctionBegin;
2018   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2019   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2020   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2021   if (size > 1) {
2022     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2023     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2024   } else {
2025     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2026     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2027   }
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 
2032 #undef __FUNCT__
2033 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2034 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2035 {
2036   Mat            mat;
2037   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2038   PetscErrorCode ierr;
2039   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2040   PetscScalar    *array;
2041 
2042   PetscFunctionBegin;
2043   *newmat       = 0;
2044   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2045   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2046   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2047   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2048   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2049   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2050 
2051   mat->factor       = matin->factor;
2052   mat->preallocated = PETSC_TRUE;
2053   mat->assembled    = PETSC_TRUE;
2054   mat->insertmode   = NOT_SET_VALUES;
2055 
2056   a = (Mat_MPISBAIJ*)mat->data;
2057   a->bs2   = oldmat->bs2;
2058   a->mbs   = oldmat->mbs;
2059   a->nbs   = oldmat->nbs;
2060   a->Mbs   = oldmat->Mbs;
2061   a->Nbs   = oldmat->Nbs;
2062 
2063 
2064   a->size         = oldmat->size;
2065   a->rank         = oldmat->rank;
2066   a->donotstash   = oldmat->donotstash;
2067   a->roworiented  = oldmat->roworiented;
2068   a->rowindices   = 0;
2069   a->rowvalues    = 0;
2070   a->getrowactive = PETSC_FALSE;
2071   a->barray       = 0;
2072   a->rstartbs    = oldmat->rstartbs;
2073   a->rendbs      = oldmat->rendbs;
2074   a->cstartbs    = oldmat->cstartbs;
2075   a->cendbs      = oldmat->cendbs;
2076 
2077   /* hash table stuff */
2078   a->ht           = 0;
2079   a->hd           = 0;
2080   a->ht_size      = 0;
2081   a->ht_flag      = oldmat->ht_flag;
2082   a->ht_fact      = oldmat->ht_fact;
2083   a->ht_total_ct  = 0;
2084   a->ht_insert_ct = 0;
2085 
2086   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2087   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2088   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2089   if (oldmat->colmap) {
2090 #if defined (PETSC_USE_CTABLE)
2091     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2092 #else
2093     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2094     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2095     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2096 #endif
2097   } else a->colmap = 0;
2098 
2099   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2100     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2101     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2102     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2103   } else a->garray = 0;
2104 
2105   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2106   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2107   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2108   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2109 
2110   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2111   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2112   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2113   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2114 
2115   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2116   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2117   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2118   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2119   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2120   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2121   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2122   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2123   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2124   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2125   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2126   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2127   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2128 
2129   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2130   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2131   a->sMvctx = oldmat->sMvctx;
2132   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2133 
2134   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2136   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2138   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2139   *newmat = mat;
2140   PetscFunctionReturn(0);
2141 }
2142 
2143 #include "petscsys.h"
2144 
2145 #undef __FUNCT__
2146 #define __FUNCT__ "MatLoad_MPISBAIJ"
2147 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2148 {
2149   Mat            A;
2150   PetscErrorCode ierr;
2151   PetscInt       i,nz,j,rstart,rend;
2152   PetscScalar    *vals,*buf;
2153   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2154   MPI_Status     status;
2155   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2156   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2157   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2158   PetscInt       bs=1,Mbs,mbs,extra_rows;
2159   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2160   PetscInt       dcount,kmax,k,nzcount,tmp;
2161   int            fd;
2162 
2163   PetscFunctionBegin;
2164   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2165     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2166   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2167 
2168   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2169   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2170   if (!rank) {
2171     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2172     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2173     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2174     if (header[3] < 0) {
2175       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2176     }
2177   }
2178 
2179   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2180   M = header[1]; N = header[2];
2181 
2182   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2183 
2184   /*
2185      This code adds extra rows to make sure the number of rows is
2186      divisible by the blocksize
2187   */
2188   Mbs        = M/bs;
2189   extra_rows = bs - M + bs*(Mbs);
2190   if (extra_rows == bs) extra_rows = 0;
2191   else                  Mbs++;
2192   if (extra_rows &&!rank) {
2193     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2194   }
2195 
2196   /* determine ownership of all rows */
2197   mbs        = Mbs/size + ((Mbs % size) > rank);
2198   m          = mbs*bs;
2199   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2200   browners   = rowners + size + 1;
2201   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2202   rowners[0] = 0;
2203   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2204   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2205   rstart = rowners[rank];
2206   rend   = rowners[rank+1];
2207 
2208   /* distribute row lengths to all processors */
2209   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2210   if (!rank) {
2211     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2212     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2213     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2214     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2215     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2216     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2217     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2218   } else {
2219     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2220   }
2221 
2222   if (!rank) {   /* procs[0] */
2223     /* calculate the number of nonzeros on each processor */
2224     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2225     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2226     for (i=0; i<size; i++) {
2227       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2228         procsnz[i] += rowlengths[j];
2229       }
2230     }
2231     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2232 
2233     /* determine max buffer needed and allocate it */
2234     maxnz = 0;
2235     for (i=0; i<size; i++) {
2236       maxnz = PetscMax(maxnz,procsnz[i]);
2237     }
2238     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2239 
2240     /* read in my part of the matrix column indices  */
2241     nz     = procsnz[0];
2242     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2243     mycols = ibuf;
2244     if (size == 1)  nz -= extra_rows;
2245     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2246     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2247 
2248     /* read in every ones (except the last) and ship off */
2249     for (i=1; i<size-1; i++) {
2250       nz   = procsnz[i];
2251       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2252       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2253     }
2254     /* read in the stuff for the last proc */
2255     if (size != 1) {
2256       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2257       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2258       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2259       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2260     }
2261     ierr = PetscFree(cols);CHKERRQ(ierr);
2262   } else {  /* procs[i], i>0 */
2263     /* determine buffer space needed for message */
2264     nz = 0;
2265     for (i=0; i<m; i++) {
2266       nz += locrowlens[i];
2267     }
2268     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2269     mycols = ibuf;
2270     /* receive message of column indices*/
2271     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2272     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2273     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2274   }
2275 
2276   /* loop over local rows, determining number of off diagonal entries */
2277   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2278   odlens   = dlens + (rend-rstart);
2279   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2280   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2281   masked1  = mask    + Mbs;
2282   masked2  = masked1 + Mbs;
2283   rowcount = 0; nzcount = 0;
2284   for (i=0; i<mbs; i++) {
2285     dcount  = 0;
2286     odcount = 0;
2287     for (j=0; j<bs; j++) {
2288       kmax = locrowlens[rowcount];
2289       for (k=0; k<kmax; k++) {
2290         tmp = mycols[nzcount++]/bs; /* block col. index */
2291         if (!mask[tmp]) {
2292           mask[tmp] = 1;
2293           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2294           else masked1[dcount++] = tmp; /* entry in diag portion */
2295         }
2296       }
2297       rowcount++;
2298     }
2299 
2300     dlens[i]  = dcount;  /* d_nzz[i] */
2301     odlens[i] = odcount; /* o_nzz[i] */
2302 
2303     /* zero out the mask elements we set */
2304     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2305     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2306   }
2307 
2308   /* create our matrix */
2309   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2310   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2311   ierr = MatSetType(A,type);CHKERRQ(ierr);
2312   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2313   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2314 
2315   if (!rank) {
2316     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2317     /* read in my part of the matrix numerical values  */
2318     nz = procsnz[0];
2319     vals = buf;
2320     mycols = ibuf;
2321     if (size == 1)  nz -= extra_rows;
2322     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2323     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2324 
2325     /* insert into matrix */
2326     jj      = rstart*bs;
2327     for (i=0; i<m; i++) {
2328       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2329       mycols += locrowlens[i];
2330       vals   += locrowlens[i];
2331       jj++;
2332     }
2333 
2334     /* read in other processors (except the last one) and ship out */
2335     for (i=1; i<size-1; i++) {
2336       nz   = procsnz[i];
2337       vals = buf;
2338       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2339       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2340     }
2341     /* the last proc */
2342     if (size != 1){
2343       nz   = procsnz[i] - extra_rows;
2344       vals = buf;
2345       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2346       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2347       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2348     }
2349     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2350 
2351   } else {
2352     /* receive numeric values */
2353     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2354 
2355     /* receive message of values*/
2356     vals   = buf;
2357     mycols = ibuf;
2358     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2359     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2360     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2361 
2362     /* insert into matrix */
2363     jj      = rstart*bs;
2364     for (i=0; i<m; i++) {
2365       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2366       mycols += locrowlens[i];
2367       vals   += locrowlens[i];
2368       jj++;
2369     }
2370   }
2371 
2372   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2373   ierr = PetscFree(buf);CHKERRQ(ierr);
2374   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2375   ierr = PetscFree(rowners);CHKERRQ(ierr);
2376   ierr = PetscFree(dlens);CHKERRQ(ierr);
2377   ierr = PetscFree(mask);CHKERRQ(ierr);
2378   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2379   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2380   *newmat = A;
2381   PetscFunctionReturn(0);
2382 }
2383 
2384 #undef __FUNCT__
2385 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2386 /*XXXXX@
2387    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2388 
2389    Input Parameters:
2390 .  mat  - the matrix
2391 .  fact - factor
2392 
2393    Collective on Mat
2394 
2395    Level: advanced
2396 
2397   Notes:
2398    This can also be set by the command line option: -mat_use_hash_table fact
2399 
2400 .keywords: matrix, hashtable, factor, HT
2401 
2402 .seealso: MatSetOption()
2403 @XXXXX*/
2404 
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2408 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2409 {
2410   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2411   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2412   PetscReal      atmp;
2413   PetscReal      *work,*svalues,*rvalues;
2414   PetscErrorCode ierr;
2415   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2416   PetscMPIInt    rank,size;
2417   PetscInt       *rowners_bs,dest,count,source;
2418   PetscScalar    *va;
2419   MatScalar      *ba;
2420   MPI_Status     stat;
2421 
2422   PetscFunctionBegin;
2423   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2424   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2425 
2426   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2427   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2428 
2429   bs   = A->rmap.bs;
2430   mbs  = a->mbs;
2431   Mbs  = a->Mbs;
2432   ba   = b->a;
2433   bi   = b->i;
2434   bj   = b->j;
2435 
2436   /* find ownerships */
2437   rowners_bs = A->rmap.range;
2438 
2439   /* each proc creates an array to be distributed */
2440   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2441   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2442 
2443   /* row_max for B */
2444   if (rank != size-1){
2445     for (i=0; i<mbs; i++) {
2446       ncols = bi[1] - bi[0]; bi++;
2447       brow  = bs*i;
2448       for (j=0; j<ncols; j++){
2449         bcol = bs*(*bj);
2450         for (kcol=0; kcol<bs; kcol++){
2451           col = bcol + kcol;                 /* local col index */
2452           col += rowners_bs[rank+1];      /* global col index */
2453           for (krow=0; krow<bs; krow++){
2454             atmp = PetscAbsScalar(*ba); ba++;
2455             row = brow + krow;    /* local row index */
2456             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2457             if (work[col] < atmp) work[col] = atmp;
2458           }
2459         }
2460         bj++;
2461       }
2462     }
2463 
2464     /* send values to its owners */
2465     for (dest=rank+1; dest<size; dest++){
2466       svalues = work + rowners_bs[dest];
2467       count   = rowners_bs[dest+1]-rowners_bs[dest];
2468       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2469     }
2470   }
2471 
2472   /* receive values */
2473   if (rank){
2474     rvalues = work;
2475     count   = rowners_bs[rank+1]-rowners_bs[rank];
2476     for (source=0; source<rank; source++){
2477       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2478       /* process values */
2479       for (i=0; i<count; i++){
2480         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2481       }
2482     }
2483   }
2484 
2485   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2486   ierr = PetscFree(work);CHKERRQ(ierr);
2487   PetscFunctionReturn(0);
2488 }
2489 
2490 #undef __FUNCT__
2491 #define __FUNCT__ "MatRelax_MPISBAIJ"
2492 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2493 {
2494   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2495   PetscErrorCode ierr;
2496   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2497   PetscScalar    *x,*b,*ptr,zero=0.0;
2498   Vec            bb1;
2499 
2500   PetscFunctionBegin;
2501   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2502   if (bs > 1)
2503     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2504 
2505   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2506     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2507       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2508       its--;
2509     }
2510 
2511     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2512     while (its--){
2513 
2514       /* lower triangular part: slvec0b = - B^T*xx */
2515       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2516 
2517       /* copy xx into slvec0a */
2518       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2519       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2520       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2521       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2522 
2523       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2524 
2525       /* copy bb into slvec1a */
2526       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2527       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2528       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2529       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2530 
2531       /* set slvec1b = 0 */
2532       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2533 
2534       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2535       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2536       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2537       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2538 
2539       /* upper triangular part: bb1 = bb1 - B*x */
2540       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2541 
2542       /* local diagonal sweep */
2543       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2544     }
2545     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2546   } else {
2547     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2548   }
2549   PetscFunctionReturn(0);
2550 }
2551 
2552 #undef __FUNCT__
2553 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2554 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2555 {
2556   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2557   PetscErrorCode ierr;
2558   Vec            lvec1,bb1;
2559 
2560   PetscFunctionBegin;
2561   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2562   if (matin->rmap.bs > 1)
2563     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2564 
2565   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2566     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2567       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2568       its--;
2569     }
2570 
2571     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2572     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2573     while (its--){
2574       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2575 
2576       /* lower diagonal part: bb1 = bb - B^T*xx */
2577       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2578       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2579 
2580       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2581       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2582       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2583 
2584       /* upper diagonal part: bb1 = bb1 - B*x */
2585       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2586       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2587 
2588       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2589 
2590       /* diagonal sweep */
2591       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2592     }
2593     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2594     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2595   } else {
2596     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2597   }
2598   PetscFunctionReturn(0);
2599 }
2600 
2601