xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 290bbb0a1dcfb34dbf94efcfcc44171581b0efea)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew,MatScalar); \
103       N = nrow++ - 1;  \
104       /* shift up all the later entries in this row */ \
105       for (ii=N; ii>=_i; ii--) { \
106         rp[ii+1] = rp[ii]; \
107         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
108       } \
109       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
110       rp[_i]                      = bcol;  \
111       ap[bs2*_i + bs*cidx + ridx] = value;  \
112       a_noinsert:; \
113     ailen[brow] = nrow; \
114 }
115 #ifndef MatSetValues_SeqBAIJ_B_Private
116 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
117 { \
118     brow = row/bs;  \
119     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
120     rmax = bimax[brow]; nrow = bilen[brow]; \
121       bcol = col/bs; \
122       ridx = row % bs; cidx = col % bs; \
123       low = 0; high = nrow; \
124       while (high-low > 3) { \
125         t = (low+high)/2; \
126         if (rp[t] > bcol) high = t; \
127         else              low  = t; \
128       } \
129       for (_i=low; _i<high; _i++) { \
130         if (rp[_i] > bcol) break; \
131         if (rp[_i] == bcol) { \
132           bap  = ap +  bs2*_i + bs*cidx + ridx; \
133           if (addv == ADD_VALUES) *bap += value;  \
134           else                    *bap  = value;  \
135           goto b_noinsert; \
136         } \
137       } \
138       if (b->nonew == 1) goto b_noinsert; \
139       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
140       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew,MatScalar); \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       b_noinsert:; \
151     bilen[brow] = nrow; \
152 }
153 #endif
154 
155 #if defined(PETSC_USE_MAT_SINGLE)
156 #undef __FUNCT__
157 #define __FUNCT__ "MatSetValues_MPISBAIJ"
158 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
159 {
160   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
161   PetscErrorCode ierr;
162   PetscInt       i,N = m*n;
163   MatScalar      *vsingle;
164 
165   PetscFunctionBegin;
166   if (N > b->setvalueslen) {
167     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
168     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
169     b->setvalueslen  = N;
170   }
171   vsingle = b->setvaluescopy;
172 
173   for (i=0; i<N; i++) {
174     vsingle[i] = v[i];
175   }
176   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
177   PetscFunctionReturn(0);
178 }
179 
180 #undef __FUNCT__
181 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
182 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
183 {
184   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
185   PetscErrorCode ierr;
186   PetscInt       i,N = m*n*b->bs2;
187   MatScalar      *vsingle;
188 
189   PetscFunctionBegin;
190   if (N > b->setvalueslen) {
191     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
192     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
193     b->setvalueslen  = N;
194   }
195   vsingle = b->setvaluescopy;
196   for (i=0; i<N; i++) {
197     vsingle[i] = v[i];
198   }
199   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
200   PetscFunctionReturn(0);
201 }
202 #endif
203 
204 /* Only add/insert a(i,j) with i<=j (blocks).
205    Any a(i,j) with i>j input by user is ingored.
206 */
207 #undef __FUNCT__
208 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
209 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
210 {
211   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
212   MatScalar      value;
213   PetscTruth     roworiented = baij->roworiented;
214   PetscErrorCode ierr;
215   PetscInt       i,j,row,col;
216   PetscInt       rstart_orig=mat->rmap.rstart;
217   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
218   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
219 
220   /* Some Variables required in the macro */
221   Mat            A = baij->A;
222   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
223   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
224   MatScalar      *aa=a->a;
225 
226   Mat            B = baij->B;
227   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
228   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
229   MatScalar     *ba=b->a;
230 
231   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
232   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
233   MatScalar     *ap,*bap;
234 
235   /* for stash */
236   PetscInt      n_loc, *in_loc = PETSC_NULL;
237   MatScalar     *v_loc = PETSC_NULL;
238 
239   PetscFunctionBegin;
240 
241   if (!baij->donotstash){
242     if (n > baij->n_loc) {
243       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
244       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
245       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
246       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
247       baij->n_loc = n;
248     }
249     in_loc = baij->in_loc;
250     v_loc  = baij->v_loc;
251   }
252 
253   for (i=0; i<m; i++) {
254     if (im[i] < 0) continue;
255 #if defined(PETSC_USE_DEBUG)
256     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
257 #endif
258     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
259       row = im[i] - rstart_orig;              /* local row index */
260       for (j=0; j<n; j++) {
261         if (im[i]/bs > in[j]/bs){
262           if (a->ignore_ltriangular){
263             continue;    /* ignore lower triangular blocks */
264           } else {
265             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
266           }
267         }
268         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
269           col = in[j] - cstart_orig;          /* local col index */
270           brow = row/bs; bcol = col/bs;
271           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
272           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
273           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
274           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
275         } else if (in[j] < 0) continue;
276 #if defined(PETSC_USE_DEBUG)
277         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
278 #endif
279         else {  /* off-diag entry (B) */
280           if (mat->was_assembled) {
281             if (!baij->colmap) {
282               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
283             }
284 #if defined (PETSC_USE_CTABLE)
285             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
286             col  = col - 1;
287 #else
288             col = baij->colmap[in[j]/bs] - 1;
289 #endif
290             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
291               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
292               col =  in[j];
293               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
294               B = baij->B;
295               b = (Mat_SeqBAIJ*)(B)->data;
296               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
297               ba=b->a;
298             } else col += in[j]%bs;
299           } else col = in[j];
300           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
301           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
302           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
303         }
304       }
305     } else {  /* off processor entry */
306       if (!baij->donotstash) {
307         n_loc = 0;
308         for (j=0; j<n; j++){
309           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
310           in_loc[n_loc] = in[j];
311           if (roworiented) {
312             v_loc[n_loc] = v[i*n+j];
313           } else {
314             v_loc[n_loc] = v[j*m+i];
315           }
316           n_loc++;
317         }
318         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
319       }
320     }
321   }
322   PetscFunctionReturn(0);
323 }
324 
325 #undef __FUNCT__
326 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
327 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
328 {
329   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
330   const MatScalar *value;
331   MatScalar       *barray=baij->barray;
332   PetscTruth      roworiented = baij->roworiented;
333   PetscErrorCode  ierr;
334   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
335   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
336   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
337 
338   PetscFunctionBegin;
339   if(!barray) {
340     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
341     baij->barray = barray;
342   }
343 
344   if (roworiented) {
345     stepval = (n-1)*bs;
346   } else {
347     stepval = (m-1)*bs;
348   }
349   for (i=0; i<m; i++) {
350     if (im[i] < 0) continue;
351 #if defined(PETSC_USE_DEBUG)
352     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
353 #endif
354     if (im[i] >= rstart && im[i] < rend) {
355       row = im[i] - rstart;
356       for (j=0; j<n; j++) {
357         /* If NumCol = 1 then a copy is not required */
358         if ((roworiented) && (n == 1)) {
359           barray = (MatScalar*) v + i*bs2;
360         } else if((!roworiented) && (m == 1)) {
361           barray = (MatScalar*) v + j*bs2;
362         } else { /* Here a copy is required */
363           if (roworiented) {
364             value = v + i*(stepval+bs)*bs + j*bs;
365           } else {
366             value = v + j*(stepval+bs)*bs + i*bs;
367           }
368           for (ii=0; ii<bs; ii++,value+=stepval) {
369             for (jj=0; jj<bs; jj++) {
370               *barray++  = *value++;
371             }
372           }
373           barray -=bs2;
374         }
375 
376         if (in[j] >= cstart && in[j] < cend){
377           col  = in[j] - cstart;
378           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
379         }
380         else if (in[j] < 0) continue;
381 #if defined(PETSC_USE_DEBUG)
382         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
383 #endif
384         else {
385           if (mat->was_assembled) {
386             if (!baij->colmap) {
387               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
388             }
389 
390 #if defined(PETSC_USE_DEBUG)
391 #if defined (PETSC_USE_CTABLE)
392             { PetscInt data;
393               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
394               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
395             }
396 #else
397             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
398 #endif
399 #endif
400 #if defined (PETSC_USE_CTABLE)
401 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
402             col  = (col - 1)/bs;
403 #else
404             col = (baij->colmap[in[j]] - 1)/bs;
405 #endif
406             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
407               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
408               col =  in[j];
409             }
410           }
411           else col = in[j];
412           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
413         }
414       }
415     } else {
416       if (!baij->donotstash) {
417         if (roworiented) {
418           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
419         } else {
420           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
421         }
422       }
423     }
424   }
425   PetscFunctionReturn(0);
426 }
427 
428 #undef __FUNCT__
429 #define __FUNCT__ "MatGetValues_MPISBAIJ"
430 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
431 {
432   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
433   PetscErrorCode ierr;
434   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
435   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
436 
437   PetscFunctionBegin;
438   for (i=0; i<m; i++) {
439     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
440     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
441     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
442       row = idxm[i] - bsrstart;
443       for (j=0; j<n; j++) {
444         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
445         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
446         if (idxn[j] >= bscstart && idxn[j] < bscend){
447           col = idxn[j] - bscstart;
448           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
449         } else {
450           if (!baij->colmap) {
451             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
452           }
453 #if defined (PETSC_USE_CTABLE)
454           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
455           data --;
456 #else
457           data = baij->colmap[idxn[j]/bs]-1;
458 #endif
459           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
460           else {
461             col  = data + idxn[j]%bs;
462             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
463           }
464         }
465       }
466     } else {
467       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
468     }
469   }
470  PetscFunctionReturn(0);
471 }
472 
473 #undef __FUNCT__
474 #define __FUNCT__ "MatNorm_MPISBAIJ"
475 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
476 {
477   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
478   PetscErrorCode ierr;
479   PetscReal      sum[2],*lnorm2;
480 
481   PetscFunctionBegin;
482   if (baij->size == 1) {
483     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
484   } else {
485     if (type == NORM_FROBENIUS) {
486       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
487       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
488       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
489       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
490       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
491       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
492       *norm = sqrt(sum[0] + 2*sum[1]);
493       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
494     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
495       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
496       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
497       PetscReal    *rsum,*rsum2,vabs;
498       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
499       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
500       MatScalar    *v;
501 
502       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
503       rsum2 = rsum + mat->cmap.N;
504       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
505       /* Amat */
506       v = amat->a; jj = amat->j;
507       for (brow=0; brow<mbs; brow++) {
508         grow = bs*(rstart + brow);
509         nz = amat->i[brow+1] - amat->i[brow];
510         for (bcol=0; bcol<nz; bcol++){
511           gcol = bs*(rstart + *jj); jj++;
512           for (col=0; col<bs; col++){
513             for (row=0; row<bs; row++){
514               vabs = PetscAbsScalar(*v); v++;
515               rsum[gcol+col] += vabs;
516               /* non-diagonal block */
517               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
518             }
519           }
520         }
521       }
522       /* Bmat */
523       v = bmat->a; jj = bmat->j;
524       for (brow=0; brow<mbs; brow++) {
525         grow = bs*(rstart + brow);
526         nz = bmat->i[brow+1] - bmat->i[brow];
527         for (bcol=0; bcol<nz; bcol++){
528           gcol = bs*garray[*jj]; jj++;
529           for (col=0; col<bs; col++){
530             for (row=0; row<bs; row++){
531               vabs = PetscAbsScalar(*v); v++;
532               rsum[gcol+col] += vabs;
533               rsum[grow+row] += vabs;
534             }
535           }
536         }
537       }
538       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
539       *norm = 0.0;
540       for (col=0; col<mat->cmap.N; col++) {
541         if (rsum2[col] > *norm) *norm = rsum2[col];
542       }
543       ierr = PetscFree(rsum);CHKERRQ(ierr);
544     } else {
545       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
546     }
547   }
548   PetscFunctionReturn(0);
549 }
550 
551 #undef __FUNCT__
552 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
553 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
554 {
555   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
556   PetscErrorCode ierr;
557   PetscInt       nstash,reallocs;
558   InsertMode     addv;
559 
560   PetscFunctionBegin;
561   if (baij->donotstash) {
562     PetscFunctionReturn(0);
563   }
564 
565   /* make sure all processors are either in INSERTMODE or ADDMODE */
566   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
567   if (addv == (ADD_VALUES|INSERT_VALUES)) {
568     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
569   }
570   mat->insertmode = addv; /* in case this processor had no cache */
571 
572   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
573   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
574   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
575   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
576   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
577   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
578   PetscFunctionReturn(0);
579 }
580 
581 #undef __FUNCT__
582 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
583 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
584 {
585   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
586   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589   PetscInt       *row,*col,other_disassembled;
590   PetscMPIInt    n;
591   PetscTruth     r1,r2,r3;
592   MatScalar      *val;
593   InsertMode     addv = mat->insertmode;
594 
595   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
596   PetscFunctionBegin;
597 
598   if (!baij->donotstash) {
599     while (1) {
600       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
601       if (!flg) break;
602 
603       for (i=0; i<n;) {
604         /* Now identify the consecutive vals belonging to the same row */
605         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
606         if (j < n) ncols = j-i;
607         else       ncols = n-i;
608         /* Now assemble all these values with a single function call */
609         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
610         i = j;
611       }
612     }
613     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
614     /* Now process the block-stash. Since the values are stashed column-oriented,
615        set the roworiented flag to column oriented, and after MatSetValues()
616        restore the original flags */
617     r1 = baij->roworiented;
618     r2 = a->roworiented;
619     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
620     baij->roworiented = PETSC_FALSE;
621     a->roworiented    = PETSC_FALSE;
622     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
623     while (1) {
624       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
625       if (!flg) break;
626 
627       for (i=0; i<n;) {
628         /* Now identify the consecutive vals belonging to the same row */
629         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
630         if (j < n) ncols = j-i;
631         else       ncols = n-i;
632         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
633         i = j;
634       }
635     }
636     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
637     baij->roworiented = r1;
638     a->roworiented    = r2;
639     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
640   }
641 
642   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
643   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
644 
645   /* determine if any processor has disassembled, if so we must
646      also disassemble ourselfs, in order that we may reassemble. */
647   /*
648      if nonzero structure of submatrix B cannot change then we know that
649      no processor disassembled thus we can skip this stuff
650   */
651   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
652     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
653     if (mat->was_assembled && !other_disassembled) {
654       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
655     }
656   }
657 
658   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
659     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
660   }
661   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
662   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
663   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
664 
665   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
666   baij->rowvalues = 0;
667 
668   PetscFunctionReturn(0);
669 }
670 
671 extern PetscErrorCode MatSetValues_MPIBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
672 #undef __FUNCT__
673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675 {
676   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode    ierr;
678   PetscInt          bs = mat->rmap.bs;
679   PetscMPIInt       size = baij->size,rank = baij->rank;
680   PetscTruth        iascii,isdraw;
681   PetscViewer       sviewer;
682   PetscViewerFormat format;
683 
684   PetscFunctionBegin;
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
687   if (iascii) {
688     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
692       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
702       PetscFunctionReturn(0);
703     } else if (format == PETSC_VIEWER_ASCII_INFO) {
704       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
705       PetscFunctionReturn(0);
706     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
707       PetscFunctionReturn(0);
708     }
709   }
710 
711   if (isdraw) {
712     PetscDraw  draw;
713     PetscTruth isnull;
714     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
715     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
716   }
717 
718   if (size == 1) {
719     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
720     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
721   } else {
722     /* assemble the entire matrix onto first processor. */
723     Mat          A;
724     Mat_SeqSBAIJ *Aloc;
725     Mat_SeqBAIJ  *Bloc;
726     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
727     MatScalar    *a;
728 
729     /* Should this be the same type as mat? */
730     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
737     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
738     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
739 
740     /* copy over the A part */
741     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
742     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
743     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
744 
745     for (i=0; i<mbs; i++) {
746       rvals[0] = bs*(baij->rstartbs + i);
747       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
748       for (j=ai[i]; j<ai[i+1]; j++) {
749         col = (baij->cstartbs+aj[j])*bs;
750         for (k=0; k<bs; k++) {
751           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
752           col++; a += bs;
753         }
754       }
755     }
756     /* copy over the B part */
757     Bloc = (Mat_SeqBAIJ*)baij->B->data;
758     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
759     for (i=0; i<mbs; i++) {
760 
761       rvals[0] = bs*(baij->rstartbs + i);
762       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
763       for (j=ai[i]; j<ai[i+1]; j++) {
764         col = baij->garray[aj[j]]*bs;
765         for (k=0; k<bs; k++) {
766           ierr = MatSetValues_MPIBAIJ(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
767           col++; a += bs;
768         }
769       }
770     }
771     ierr = PetscFree(rvals);CHKERRQ(ierr);
772     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
774     /*
775        Everyone has to call to draw the matrix since the graphics waits are
776        synchronized across all processors that share the PetscDraw object
777     */
778     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
779     if (!rank) {
780       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
781       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
782     }
783     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
784     ierr = MatDestroy(A);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 #undef __FUNCT__
790 #define __FUNCT__ "MatView_MPISBAIJ"
791 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
792 {
793   PetscErrorCode ierr;
794   PetscTruth     iascii,isdraw,issocket,isbinary;
795 
796   PetscFunctionBegin;
797   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
801   if (iascii || isdraw || issocket || isbinary) {
802     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
803   } else {
804     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatDestroy_MPISBAIJ"
811 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
812 {
813   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817 #if defined(PETSC_USE_LOG)
818   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
819 #endif
820   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
821   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
822   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
823   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
824 #if defined (PETSC_USE_CTABLE)
825   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
826 #else
827   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
828 #endif
829   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
830   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
831   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
832   if (baij->slvec0) {
833     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
834     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
835   }
836   if (baij->slvec1) {
837     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
838     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
839     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
840   }
841   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
842   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
843   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
844   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
845 #if defined(PETSC_USE_MAT_SINGLE)
846   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
847 #endif
848   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
849   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
850   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
851   ierr = PetscFree(baij);CHKERRQ(ierr);
852 
853   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatMult_MPISBAIJ"
862 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
863 {
864   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
865   PetscErrorCode ierr;
866   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
867   PetscScalar    *x,*from,zero=0.0;
868 
869   PetscFunctionBegin;
870   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
871   if (nt != A->cmap.n) {
872     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
873   }
874 
875   /* diagonal part */
876   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
877   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
878 
879   /* subdiagonal part */
880   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
881   CHKMEMQ;
882   /* copy x into the vec slvec0 */
883   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
884   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
885   CHKMEMQ;
886   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
887   CHKMEMQ;
888   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
889 
890   CHKMEMQ;
891   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
892   CHKMEMQ;
893   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
894   CHKMEMQ;
895   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
896     CHKMEMQ;
897   /* supperdiagonal part */
898   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
899     CHKMEMQ;
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
905 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
906 {
907   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
908   PetscErrorCode ierr;
909   PetscInt       nt;
910 
911   PetscFunctionBegin;
912   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
913   if (nt != A->cmap.n) {
914     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
915   }
916   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
917   if (nt != A->rmap.N) {
918     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
919   }
920 
921   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
922   /* do diagonal part */
923   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
924   /* do supperdiagonal part */
925   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
926   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
927   /* do subdiagonal part */
928   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
929   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
930   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
931 
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
937 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
938 {
939   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
940   PetscErrorCode ierr;
941   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
942   PetscScalar    *x,*from,zero=0.0;
943 
944   PetscFunctionBegin;
945   /*
946   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
947   PetscSynchronizedFlush(A->comm);
948   */
949   /* diagonal part */
950   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
951   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
952 
953   /* subdiagonal part */
954   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
955 
956   /* copy x into the vec slvec0 */
957   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
958   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
959   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
960   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
961 
962   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
965 
966   /* supperdiagonal part */
967   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
968 
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
974 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
975 {
976   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
981   /* do diagonal part */
982   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
983   /* do supperdiagonal part */
984   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
985   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
986 
987   /* do subdiagonal part */
988   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
989   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
990   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
991 
992   PetscFunctionReturn(0);
993 }
994 
995 /*
996   This only works correctly for square matrices where the subblock A->A is the
997    diagonal block
998 */
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1001 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1002 {
1003   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1008   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatScale_MPISBAIJ"
1014 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1015 {
1016   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1021   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1027 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1028 {
1029   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1030   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1031   PetscErrorCode ierr;
1032   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1033   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1034   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1035 
1036   PetscFunctionBegin;
1037   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1038   mat->getrowactive = PETSC_TRUE;
1039 
1040   if (!mat->rowvalues && (idx || v)) {
1041     /*
1042         allocate enough space to hold information from the longest row.
1043     */
1044     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1045     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1046     PetscInt     max = 1,mbs = mat->mbs,tmp;
1047     for (i=0; i<mbs; i++) {
1048       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1049       if (max < tmp) { max = tmp; }
1050     }
1051     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1052     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1053   }
1054 
1055   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1056   lrow = row - brstart;  /* local row index */
1057 
1058   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1059   if (!v)   {pvA = 0; pvB = 0;}
1060   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1061   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   nztot = nzA + nzB;
1064 
1065   cmap  = mat->garray;
1066   if (v  || idx) {
1067     if (nztot) {
1068       /* Sort by increasing column numbers, assuming A and B already sorted */
1069       PetscInt imark = -1;
1070       if (v) {
1071         *v = v_p = mat->rowvalues;
1072         for (i=0; i<nzB; i++) {
1073           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1074           else break;
1075         }
1076         imark = i;
1077         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1078         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1079       }
1080       if (idx) {
1081         *idx = idx_p = mat->rowindices;
1082         if (imark > -1) {
1083           for (i=0; i<imark; i++) {
1084             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1085           }
1086         } else {
1087           for (i=0; i<nzB; i++) {
1088             if (cmap[cworkB[i]/bs] < cstart)
1089               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1090             else break;
1091           }
1092           imark = i;
1093         }
1094         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1095         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1096       }
1097     } else {
1098       if (idx) *idx = 0;
1099       if (v)   *v   = 0;
1100     }
1101   }
1102   *nz = nztot;
1103   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1104   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1110 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1111 {
1112   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1113 
1114   PetscFunctionBegin;
1115   if (!baij->getrowactive) {
1116     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1117   }
1118   baij->getrowactive = PETSC_FALSE;
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1124 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1127   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1128 
1129   PetscFunctionBegin;
1130   aA->getrow_utriangular = PETSC_TRUE;
1131   PetscFunctionReturn(0);
1132 }
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1135 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1136 {
1137   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1138   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1139 
1140   PetscFunctionBegin;
1141   aA->getrow_utriangular = PETSC_FALSE;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1147 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1148 {
1149   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1154   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1160 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1161 {
1162   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1163   PetscErrorCode ierr;
1164 
1165   PetscFunctionBegin;
1166   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1167   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1173 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1174 {
1175   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1176   PetscErrorCode ierr;
1177 
1178   PetscFunctionBegin;
1179   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1180   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1186 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1187 {
1188   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1189   Mat            A = a->A,B = a->B;
1190   PetscErrorCode ierr;
1191   PetscReal      isend[5],irecv[5];
1192 
1193   PetscFunctionBegin;
1194   info->block_size     = (PetscReal)matin->rmap.bs;
1195   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1196   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1197   isend[3] = info->memory;  isend[4] = info->mallocs;
1198   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1199   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1200   isend[3] += info->memory;  isend[4] += info->mallocs;
1201   if (flag == MAT_LOCAL) {
1202     info->nz_used      = isend[0];
1203     info->nz_allocated = isend[1];
1204     info->nz_unneeded  = isend[2];
1205     info->memory       = isend[3];
1206     info->mallocs      = isend[4];
1207   } else if (flag == MAT_GLOBAL_MAX) {
1208     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1209     info->nz_used      = irecv[0];
1210     info->nz_allocated = irecv[1];
1211     info->nz_unneeded  = irecv[2];
1212     info->memory       = irecv[3];
1213     info->mallocs      = irecv[4];
1214   } else if (flag == MAT_GLOBAL_SUM) {
1215     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1216     info->nz_used      = irecv[0];
1217     info->nz_allocated = irecv[1];
1218     info->nz_unneeded  = irecv[2];
1219     info->memory       = irecv[3];
1220     info->mallocs      = irecv[4];
1221   } else {
1222     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1223   }
1224   info->rows_global       = (PetscReal)A->rmap.N;
1225   info->columns_global    = (PetscReal)A->cmap.N;
1226   info->rows_local        = (PetscReal)A->rmap.N;
1227   info->columns_local     = (PetscReal)A->cmap.N;
1228   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1229   info->fill_ratio_needed = 0;
1230   info->factor_mallocs    = 0;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1236 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1237 {
1238   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1239   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   switch (op) {
1244   case MAT_NO_NEW_NONZERO_LOCATIONS:
1245   case MAT_YES_NEW_NONZERO_LOCATIONS:
1246   case MAT_COLUMNS_UNSORTED:
1247   case MAT_COLUMNS_SORTED:
1248   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1249   case MAT_KEEP_ZEROED_ROWS:
1250   case MAT_NEW_NONZERO_LOCATION_ERR:
1251     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1252     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1253     break;
1254   case MAT_ROW_ORIENTED:
1255     a->roworiented = PETSC_TRUE;
1256     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1257     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1258     break;
1259   case MAT_ROWS_SORTED:
1260   case MAT_ROWS_UNSORTED:
1261   case MAT_YES_NEW_DIAGONALS:
1262     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1263     break;
1264   case MAT_COLUMN_ORIENTED:
1265     a->roworiented = PETSC_FALSE;
1266     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1267     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1268     break;
1269   case MAT_IGNORE_OFF_PROC_ENTRIES:
1270     a->donotstash = PETSC_TRUE;
1271     break;
1272   case MAT_NO_NEW_DIAGONALS:
1273     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1274   case MAT_USE_HASH_TABLE:
1275     a->ht_flag = PETSC_TRUE;
1276     break;
1277   case MAT_NOT_SYMMETRIC:
1278   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1279   case MAT_HERMITIAN:
1280     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1281   case MAT_SYMMETRIC:
1282   case MAT_STRUCTURALLY_SYMMETRIC:
1283   case MAT_NOT_HERMITIAN:
1284   case MAT_SYMMETRY_ETERNAL:
1285   case MAT_NOT_SYMMETRY_ETERNAL:
1286     ierr = PetscInfo1(A,"Option %s ignored\n",MatOptions[op]);CHKERRQ(ierr);
1287     break;
1288   case MAT_IGNORE_LOWER_TRIANGULAR:
1289     aA->ignore_ltriangular = PETSC_TRUE;
1290     break;
1291   case MAT_ERROR_LOWER_TRIANGULAR:
1292     aA->ignore_ltriangular = PETSC_FALSE;
1293     break;
1294   case MAT_GETROW_UPPERTRIANGULAR:
1295     aA->getrow_utriangular = PETSC_TRUE;
1296     break;
1297   default:
1298     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1299   }
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1305 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1306 {
1307   PetscErrorCode ierr;
1308   PetscFunctionBegin;
1309   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1315 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1316 {
1317   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1318   Mat            a=baij->A, b=baij->B;
1319   PetscErrorCode ierr;
1320   PetscInt       nv,m,n;
1321   PetscTruth     flg;
1322 
1323   PetscFunctionBegin;
1324   if (ll != rr){
1325     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1326     if (!flg)
1327       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1328   }
1329   if (!ll) PetscFunctionReturn(0);
1330 
1331   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1332   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1333 
1334   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1335   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1336 
1337   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1338 
1339   /* left diagonalscale the off-diagonal part */
1340   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1341 
1342   /* scale the diagonal part */
1343   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1344 
1345   /* right diagonalscale the off-diagonal part */
1346   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1347   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1353 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1354 {
1355   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1364 
1365 #undef __FUNCT__
1366 #define __FUNCT__ "MatEqual_MPISBAIJ"
1367 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1368 {
1369   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1370   Mat            a,b,c,d;
1371   PetscTruth     flg;
1372   PetscErrorCode ierr;
1373 
1374   PetscFunctionBegin;
1375   a = matA->A; b = matA->B;
1376   c = matB->A; d = matB->B;
1377 
1378   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1379   if (flg) {
1380     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1381   }
1382   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 #undef __FUNCT__
1387 #define __FUNCT__ "MatCopy_MPISBAIJ"
1388 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1389 {
1390   PetscErrorCode ierr;
1391   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1392   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1393 
1394   PetscFunctionBegin;
1395   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1396   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1397     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1398     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1399     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1400   } else {
1401     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1402     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1403   }
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 #undef __FUNCT__
1408 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1409 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1410 {
1411   PetscErrorCode ierr;
1412 
1413   PetscFunctionBegin;
1414   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1415   PetscFunctionReturn(0);
1416 }
1417 
1418 #include "petscblaslapack.h"
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1421 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1422 {
1423   PetscErrorCode ierr;
1424   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1425   PetscBLASInt   bnz,one=1;
1426   Mat_SeqSBAIJ   *xa,*ya;
1427   Mat_SeqBAIJ    *xb,*yb;
1428 
1429   PetscFunctionBegin;
1430   if (str == SAME_NONZERO_PATTERN) {
1431     PetscScalar alpha = a;
1432     xa = (Mat_SeqSBAIJ *)xx->A->data;
1433     ya = (Mat_SeqSBAIJ *)yy->A->data;
1434     bnz = (PetscBLASInt)xa->nz;
1435     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1436     xb = (Mat_SeqBAIJ *)xx->B->data;
1437     yb = (Mat_SeqBAIJ *)yy->B->data;
1438     bnz = (PetscBLASInt)xb->nz;
1439     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1440   } else {
1441     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1442     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1443     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1444   }
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1450 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1451 {
1452   PetscErrorCode ierr;
1453   PetscInt       i;
1454   PetscTruth     flg;
1455 
1456   PetscFunctionBegin;
1457   for (i=0; i<n; i++) {
1458     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1459     if (!flg) {
1460       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1461     }
1462   }
1463   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 
1468 /* -------------------------------------------------------------------*/
1469 static struct _MatOps MatOps_Values = {
1470        MatSetValues_MPISBAIJ,
1471        MatGetRow_MPISBAIJ,
1472        MatRestoreRow_MPISBAIJ,
1473        MatMult_MPISBAIJ,
1474 /* 4*/ MatMultAdd_MPISBAIJ,
1475        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1476        MatMultAdd_MPISBAIJ,
1477        0,
1478        0,
1479        0,
1480 /*10*/ 0,
1481        0,
1482        0,
1483        MatRelax_MPISBAIJ,
1484        MatTranspose_MPISBAIJ,
1485 /*15*/ MatGetInfo_MPISBAIJ,
1486        MatEqual_MPISBAIJ,
1487        MatGetDiagonal_MPISBAIJ,
1488        MatDiagonalScale_MPISBAIJ,
1489        MatNorm_MPISBAIJ,
1490 /*20*/ MatAssemblyBegin_MPISBAIJ,
1491        MatAssemblyEnd_MPISBAIJ,
1492        0,
1493        MatSetOption_MPISBAIJ,
1494        MatZeroEntries_MPISBAIJ,
1495 /*25*/ 0,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*35*/ MatDuplicate_MPISBAIJ,
1506        0,
1507        0,
1508        0,
1509        0,
1510 /*40*/ MatAXPY_MPISBAIJ,
1511        MatGetSubMatrices_MPISBAIJ,
1512        MatIncreaseOverlap_MPISBAIJ,
1513        MatGetValues_MPISBAIJ,
1514        MatCopy_MPISBAIJ,
1515 /*45*/ 0,
1516        MatScale_MPISBAIJ,
1517        0,
1518        0,
1519        0,
1520 /*50*/ 0,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*55*/ 0,
1526        0,
1527        MatSetUnfactored_MPISBAIJ,
1528        0,
1529        MatSetValuesBlocked_MPISBAIJ,
1530 /*60*/ 0,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*65*/ 0,
1536        0,
1537        0,
1538        0,
1539        0,
1540 /*70*/ MatGetRowMax_MPISBAIJ,
1541        0,
1542        0,
1543        0,
1544        0,
1545 /*75*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*80*/ 0,
1551        0,
1552        0,
1553        0,
1554        MatLoad_MPISBAIJ,
1555 /*85*/ 0,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*90*/ 0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*95*/ 0,
1566        0,
1567        0,
1568        0,
1569        0,
1570 /*100*/0,
1571        0,
1572        0,
1573        0,
1574        0,
1575 /*105*/0,
1576        MatRealPart_MPISBAIJ,
1577        MatImaginaryPart_MPISBAIJ,
1578        MatGetRowUpperTriangular_MPISBAIJ,
1579        MatRestoreRowUpperTriangular_MPISBAIJ
1580 };
1581 
1582 
1583 EXTERN_C_BEGIN
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1586 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1587 {
1588   PetscFunctionBegin;
1589   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1590   *iscopy = PETSC_FALSE;
1591   PetscFunctionReturn(0);
1592 }
1593 EXTERN_C_END
1594 
1595 EXTERN_C_BEGIN
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1598 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1599 {
1600   Mat_MPISBAIJ   *b;
1601   PetscErrorCode ierr;
1602   PetscInt       i,mbs,Mbs;
1603 
1604   PetscFunctionBegin;
1605   ierr = PetscOptionsBegin(B->comm,B->prefix,"Options for MPIBAIJ matrix","Mat");CHKERRQ(ierr);
1606     ierr = PetscOptionsInt("-mat_block_size","Set the blocksize used to store the matrix","MatMPIBAIJSetPreallocation",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
1607   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1608 
1609   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1610   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1611   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1612   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1613   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1614 
1615   B->rmap.bs = B->cmap.bs = bs;
1616   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1617   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1618 
1619   if (d_nnz) {
1620     for (i=0; i<B->rmap.n/bs; i++) {
1621       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1622     }
1623   }
1624   if (o_nnz) {
1625     for (i=0; i<B->rmap.n/bs; i++) {
1626       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1627     }
1628   }
1629   B->preallocated = PETSC_TRUE;
1630 
1631   b   = (Mat_MPISBAIJ*)B->data;
1632   mbs = B->rmap.n/bs;
1633   Mbs = B->rmap.N/bs;
1634   if (mbs*bs != B->rmap.n) {
1635     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1636   }
1637 
1638   B->rmap.bs  = bs;
1639   b->bs2 = bs*bs;
1640   b->mbs = mbs;
1641   b->nbs = mbs;
1642   b->Mbs = Mbs;
1643   b->Nbs = Mbs;
1644 
1645   for (i=0; i<=b->size; i++) {
1646     b->rangebs[i] = B->rmap.range[i]/bs;
1647   }
1648   b->rstartbs = B->rmap.rstart/bs;
1649   b->rendbs   = B->rmap.rend/bs;
1650 
1651   b->cstartbs = B->cmap.rstart/bs;
1652   b->cendbs   = B->cmap.rend/bs;
1653 
1654   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1655   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1656   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1657   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1658   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1659 
1660   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1661   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1662   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1663   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1664   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1665 
1666   /* build cache for off array entries formed */
1667   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1668 
1669   PetscFunctionReturn(0);
1670 }
1671 EXTERN_C_END
1672 
1673 /*MC
1674    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1675    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1676 
1677    Options Database Keys:
1678 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1679 
1680   Level: beginner
1681 
1682 .seealso: MatCreateMPISBAIJ
1683 M*/
1684 
1685 EXTERN_C_BEGIN
1686 #undef __FUNCT__
1687 #define __FUNCT__ "MatCreate_MPISBAIJ"
1688 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1689 {
1690   Mat_MPISBAIJ   *b;
1691   PetscErrorCode ierr;
1692   PetscTruth     flg;
1693 
1694   PetscFunctionBegin;
1695 
1696   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1697   B->data = (void*)b;
1698   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1699 
1700   B->ops->destroy    = MatDestroy_MPISBAIJ;
1701   B->ops->view       = MatView_MPISBAIJ;
1702   B->mapping    = 0;
1703   B->factor     = 0;
1704   B->assembled  = PETSC_FALSE;
1705 
1706   B->insertmode = NOT_SET_VALUES;
1707   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1708   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1709 
1710   /* build local table of row and column ownerships */
1711   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1712 
1713   /* build cache for off array entries formed */
1714   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1715   b->donotstash  = PETSC_FALSE;
1716   b->colmap      = PETSC_NULL;
1717   b->garray      = PETSC_NULL;
1718   b->roworiented = PETSC_TRUE;
1719 
1720 #if defined(PETSC_USE_MAT_SINGLE)
1721   /* stuff for MatSetValues_XXX in single precision */
1722   b->setvalueslen     = 0;
1723   b->setvaluescopy    = PETSC_NULL;
1724 #endif
1725 
1726   /* stuff used in block assembly */
1727   b->barray       = 0;
1728 
1729   /* stuff used for matrix vector multiply */
1730   b->lvec         = 0;
1731   b->Mvctx        = 0;
1732   b->slvec0       = 0;
1733   b->slvec0b      = 0;
1734   b->slvec1       = 0;
1735   b->slvec1a      = 0;
1736   b->slvec1b      = 0;
1737   b->sMvctx       = 0;
1738 
1739   /* stuff for MatGetRow() */
1740   b->rowindices   = 0;
1741   b->rowvalues    = 0;
1742   b->getrowactive = PETSC_FALSE;
1743 
1744   /* hash table stuff */
1745   b->ht           = 0;
1746   b->hd           = 0;
1747   b->ht_size      = 0;
1748   b->ht_flag      = PETSC_FALSE;
1749   b->ht_fact      = 0;
1750   b->ht_total_ct  = 0;
1751   b->ht_insert_ct = 0;
1752 
1753   b->in_loc       = 0;
1754   b->v_loc        = 0;
1755   b->n_loc        = 0;
1756   ierr = PetscOptionsBegin(B->comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr);
1757     ierr = PetscOptionsTruth("-mat_use_hash_table","Use hash table to save memory in constructing matrix","MatSetOption",PETSC_FALSE,&flg,PETSC_NULL);CHKERRQ(ierr);
1758     if (flg) {
1759       PetscReal fact = 1.39;
1760       ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1761       ierr = PetscOptionsReal("-mat_use_hash_table","Use hash table factor","MatMPIBAIJSetHashTableFactor",fact,&fact,PETSC_NULL);CHKERRQ(ierr);
1762       if (fact <= 1.0) fact = 1.39;
1763       ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1764       ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1765     }
1766   ierr = PetscOptionsEnd();CHKERRQ(ierr);
1767 
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1769                                      "MatStoreValues_MPISBAIJ",
1770                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1772                                      "MatRetrieveValues_MPISBAIJ",
1773                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1775                                      "MatGetDiagonalBlock_MPISBAIJ",
1776                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1778                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1779                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1780   B->symmetric                  = PETSC_TRUE;
1781   B->structurally_symmetric     = PETSC_TRUE;
1782   B->symmetric_set              = PETSC_TRUE;
1783   B->structurally_symmetric_set = PETSC_TRUE;
1784   ierr = PetscObjectChangeTypeName((PetscObject)B,MATMPISBAIJ);CHKERRQ(ierr);
1785   PetscFunctionReturn(0);
1786 }
1787 EXTERN_C_END
1788 
1789 /*MC
1790    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1791 
1792    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1793    and MATMPISBAIJ otherwise.
1794 
1795    Options Database Keys:
1796 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1797 
1798   Level: beginner
1799 
1800 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1801 M*/
1802 
1803 EXTERN_C_BEGIN
1804 #undef __FUNCT__
1805 #define __FUNCT__ "MatCreate_SBAIJ"
1806 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1807 {
1808   PetscErrorCode ierr;
1809   PetscMPIInt    size;
1810 
1811   PetscFunctionBegin;
1812   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1813   if (size == 1) {
1814     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1815   } else {
1816     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1817   }
1818   PetscFunctionReturn(0);
1819 }
1820 EXTERN_C_END
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1824 /*@C
1825    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1826    the user should preallocate the matrix storage by setting the parameters
1827    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1828    performance can be increased by more than a factor of 50.
1829 
1830    Collective on Mat
1831 
1832    Input Parameters:
1833 +  A - the matrix
1834 .  bs   - size of blockk
1835 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1836            submatrix  (same for all local rows)
1837 .  d_nnz - array containing the number of block nonzeros in the various block rows
1838            in the upper triangular and diagonal part of the in diagonal portion of the local
1839            (possibly different for each block row) or PETSC_NULL.  You must leave room
1840            for the diagonal entry even if it is zero.
1841 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1842            submatrix (same for all local rows).
1843 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1844            off-diagonal portion of the local submatrix (possibly different for
1845            each block row) or PETSC_NULL.
1846 
1847 
1848    Options Database Keys:
1849 .   -mat_no_unroll - uses code that does not unroll the loops in the
1850                      block calculations (much slower)
1851 .   -mat_block_size - size of the blocks to use
1852 
1853    Notes:
1854 
1855    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1856    than it must be used on all processors that share the object for that argument.
1857 
1858    If the *_nnz parameter is given then the *_nz parameter is ignored
1859 
1860    Storage Information:
1861    For a square global matrix we define each processor's diagonal portion
1862    to be its local rows and the corresponding columns (a square submatrix);
1863    each processor's off-diagonal portion encompasses the remainder of the
1864    local matrix (a rectangular submatrix).
1865 
1866    The user can specify preallocated storage for the diagonal part of
1867    the local submatrix with either d_nz or d_nnz (not both).  Set
1868    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1869    memory allocation.  Likewise, specify preallocated storage for the
1870    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1871 
1872    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1873    the figure below we depict these three local rows and all columns (0-11).
1874 
1875 .vb
1876            0 1 2 3 4 5 6 7 8 9 10 11
1877           -------------------
1878    row 3  |  o o o d d d o o o o o o
1879    row 4  |  o o o d d d o o o o o o
1880    row 5  |  o o o d d d o o o o o o
1881           -------------------
1882 .ve
1883 
1884    Thus, any entries in the d locations are stored in the d (diagonal)
1885    submatrix, and any entries in the o locations are stored in the
1886    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1887    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1888 
1889    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1890    plus the diagonal part of the d matrix,
1891    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1892    In general, for PDE problems in which most nonzeros are near the diagonal,
1893    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1894    or you will get TERRIBLE performance; see the users' manual chapter on
1895    matrices.
1896 
1897    Level: intermediate
1898 
1899 .keywords: matrix, block, aij, compressed row, sparse, parallel
1900 
1901 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1902 @*/
1903 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1904 {
1905   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1906 
1907   PetscFunctionBegin;
1908   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1909   if (f) {
1910     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1911   }
1912   PetscFunctionReturn(0);
1913 }
1914 
1915 #undef __FUNCT__
1916 #define __FUNCT__ "MatCreateMPISBAIJ"
1917 /*@C
1918    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1919    (block compressed row).  For good matrix assembly performance
1920    the user should preallocate the matrix storage by setting the parameters
1921    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1922    performance can be increased by more than a factor of 50.
1923 
1924    Collective on MPI_Comm
1925 
1926    Input Parameters:
1927 +  comm - MPI communicator
1928 .  bs   - size of blockk
1929 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1930            This value should be the same as the local size used in creating the
1931            y vector for the matrix-vector product y = Ax.
1932 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1933            This value should be the same as the local size used in creating the
1934            x vector for the matrix-vector product y = Ax.
1935 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1936 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1937 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1938            submatrix  (same for all local rows)
1939 .  d_nnz - array containing the number of block nonzeros in the various block rows
1940            in the upper triangular portion of the in diagonal portion of the local
1941            (possibly different for each block block row) or PETSC_NULL.
1942            You must leave room for the diagonal entry even if it is zero.
1943 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1944            submatrix (same for all local rows).
1945 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1946            off-diagonal portion of the local submatrix (possibly different for
1947            each block row) or PETSC_NULL.
1948 
1949    Output Parameter:
1950 .  A - the matrix
1951 
1952    Options Database Keys:
1953 .   -mat_no_unroll - uses code that does not unroll the loops in the
1954                      block calculations (much slower)
1955 .   -mat_block_size - size of the blocks to use
1956 .   -mat_mpi - use the parallel matrix data structures even on one processor
1957                (defaults to using SeqBAIJ format on one processor)
1958 
1959    Notes:
1960    The number of rows and columns must be divisible by blocksize.
1961 
1962    The user MUST specify either the local or global matrix dimensions
1963    (possibly both).
1964 
1965    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1966    than it must be used on all processors that share the object for that argument.
1967 
1968    If the *_nnz parameter is given then the *_nz parameter is ignored
1969 
1970    Storage Information:
1971    For a square global matrix we define each processor's diagonal portion
1972    to be its local rows and the corresponding columns (a square submatrix);
1973    each processor's off-diagonal portion encompasses the remainder of the
1974    local matrix (a rectangular submatrix).
1975 
1976    The user can specify preallocated storage for the diagonal part of
1977    the local submatrix with either d_nz or d_nnz (not both).  Set
1978    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1979    memory allocation.  Likewise, specify preallocated storage for the
1980    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1981 
1982    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1983    the figure below we depict these three local rows and all columns (0-11).
1984 
1985 .vb
1986            0 1 2 3 4 5 6 7 8 9 10 11
1987           -------------------
1988    row 3  |  o o o d d d o o o o o o
1989    row 4  |  o o o d d d o o o o o o
1990    row 5  |  o o o d d d o o o o o o
1991           -------------------
1992 .ve
1993 
1994    Thus, any entries in the d locations are stored in the d (diagonal)
1995    submatrix, and any entries in the o locations are stored in the
1996    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1997    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1998 
1999    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2000    plus the diagonal part of the d matrix,
2001    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2002    In general, for PDE problems in which most nonzeros are near the diagonal,
2003    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2004    or you will get TERRIBLE performance; see the users' manual chapter on
2005    matrices.
2006 
2007    Level: intermediate
2008 
2009 .keywords: matrix, block, aij, compressed row, sparse, parallel
2010 
2011 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2012 @*/
2013 
2014 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2015 {
2016   PetscErrorCode ierr;
2017   PetscMPIInt    size;
2018 
2019   PetscFunctionBegin;
2020   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2021   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2022   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2023   if (size > 1) {
2024     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2025     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2026   } else {
2027     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2028     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2029   }
2030   PetscFunctionReturn(0);
2031 }
2032 
2033 
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2036 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2037 {
2038   Mat            mat;
2039   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2040   PetscErrorCode ierr;
2041   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2042   PetscScalar    *array;
2043 
2044   PetscFunctionBegin;
2045   *newmat       = 0;
2046   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2047   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2048   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2049   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2050   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2051   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2052 
2053   mat->factor       = matin->factor;
2054   mat->preallocated = PETSC_TRUE;
2055   mat->assembled    = PETSC_TRUE;
2056   mat->insertmode   = NOT_SET_VALUES;
2057 
2058   a = (Mat_MPISBAIJ*)mat->data;
2059   a->bs2   = oldmat->bs2;
2060   a->mbs   = oldmat->mbs;
2061   a->nbs   = oldmat->nbs;
2062   a->Mbs   = oldmat->Mbs;
2063   a->Nbs   = oldmat->Nbs;
2064 
2065 
2066   a->size         = oldmat->size;
2067   a->rank         = oldmat->rank;
2068   a->donotstash   = oldmat->donotstash;
2069   a->roworiented  = oldmat->roworiented;
2070   a->rowindices   = 0;
2071   a->rowvalues    = 0;
2072   a->getrowactive = PETSC_FALSE;
2073   a->barray       = 0;
2074   a->rstartbs    = oldmat->rstartbs;
2075   a->rendbs      = oldmat->rendbs;
2076   a->cstartbs    = oldmat->cstartbs;
2077   a->cendbs      = oldmat->cendbs;
2078 
2079   /* hash table stuff */
2080   a->ht           = 0;
2081   a->hd           = 0;
2082   a->ht_size      = 0;
2083   a->ht_flag      = oldmat->ht_flag;
2084   a->ht_fact      = oldmat->ht_fact;
2085   a->ht_total_ct  = 0;
2086   a->ht_insert_ct = 0;
2087 
2088   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2089   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2090   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2091   if (oldmat->colmap) {
2092 #if defined (PETSC_USE_CTABLE)
2093     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2094 #else
2095     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2096     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2097     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2098 #endif
2099   } else a->colmap = 0;
2100 
2101   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2102     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2103     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2104     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2105   } else a->garray = 0;
2106 
2107   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2108   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2109   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2110   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2111 
2112   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2113   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2114   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2115   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2116 
2117   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2118   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2119   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2120   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2121   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2122   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2123   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2124   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2125   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2126   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2127   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2128   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2129   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2130 
2131   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2132   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2133   a->sMvctx = oldmat->sMvctx;
2134   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2135 
2136   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2138   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2140   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2141   *newmat = mat;
2142   PetscFunctionReturn(0);
2143 }
2144 
2145 #include "petscsys.h"
2146 
2147 #undef __FUNCT__
2148 #define __FUNCT__ "MatLoad_MPISBAIJ"
2149 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2150 {
2151   Mat            A;
2152   PetscErrorCode ierr;
2153   PetscInt       i,nz,j,rstart,rend;
2154   PetscScalar    *vals,*buf;
2155   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2156   MPI_Status     status;
2157   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2158   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2159   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2160   PetscInt       bs=1,Mbs,mbs,extra_rows;
2161   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2162   PetscInt       dcount,kmax,k,nzcount,tmp;
2163   int            fd;
2164 
2165   PetscFunctionBegin;
2166   ierr = PetscOptionsBegin(comm,PETSC_NULL,"Options for loading MPIBAIJ matrix","Mat");CHKERRQ(ierr);
2167     ierr = PetscOptionsInt("-matload_block_size","Set the blocksize used to store the matrix","MatLoad",bs,&bs,PETSC_NULL);CHKERRQ(ierr);
2168   ierr = PetscOptionsEnd();CHKERRQ(ierr);
2169 
2170   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2171   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2172   if (!rank) {
2173     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2174     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2175     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2176     if (header[3] < 0) {
2177       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2178     }
2179   }
2180 
2181   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2182   M = header[1]; N = header[2];
2183 
2184   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2185 
2186   /*
2187      This code adds extra rows to make sure the number of rows is
2188      divisible by the blocksize
2189   */
2190   Mbs        = M/bs;
2191   extra_rows = bs - M + bs*(Mbs);
2192   if (extra_rows == bs) extra_rows = 0;
2193   else                  Mbs++;
2194   if (extra_rows &&!rank) {
2195     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2196   }
2197 
2198   /* determine ownership of all rows */
2199   mbs        = Mbs/size + ((Mbs % size) > rank);
2200   m          = mbs*bs;
2201   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2202   browners   = rowners + size + 1;
2203   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2204   rowners[0] = 0;
2205   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2206   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2207   rstart = rowners[rank];
2208   rend   = rowners[rank+1];
2209 
2210   /* distribute row lengths to all processors */
2211   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2212   if (!rank) {
2213     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2214     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2215     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2216     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2217     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2218     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2219     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2220   } else {
2221     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2222   }
2223 
2224   if (!rank) {   /* procs[0] */
2225     /* calculate the number of nonzeros on each processor */
2226     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2227     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2228     for (i=0; i<size; i++) {
2229       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2230         procsnz[i] += rowlengths[j];
2231       }
2232     }
2233     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2234 
2235     /* determine max buffer needed and allocate it */
2236     maxnz = 0;
2237     for (i=0; i<size; i++) {
2238       maxnz = PetscMax(maxnz,procsnz[i]);
2239     }
2240     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2241 
2242     /* read in my part of the matrix column indices  */
2243     nz     = procsnz[0];
2244     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2245     mycols = ibuf;
2246     if (size == 1)  nz -= extra_rows;
2247     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2248     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2249 
2250     /* read in every ones (except the last) and ship off */
2251     for (i=1; i<size-1; i++) {
2252       nz   = procsnz[i];
2253       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2254       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2255     }
2256     /* read in the stuff for the last proc */
2257     if (size != 1) {
2258       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2259       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2260       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2261       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2262     }
2263     ierr = PetscFree(cols);CHKERRQ(ierr);
2264   } else {  /* procs[i], i>0 */
2265     /* determine buffer space needed for message */
2266     nz = 0;
2267     for (i=0; i<m; i++) {
2268       nz += locrowlens[i];
2269     }
2270     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2271     mycols = ibuf;
2272     /* receive message of column indices*/
2273     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2274     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2275     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2276   }
2277 
2278   /* loop over local rows, determining number of off diagonal entries */
2279   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2280   odlens   = dlens + (rend-rstart);
2281   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2282   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2283   masked1  = mask    + Mbs;
2284   masked2  = masked1 + Mbs;
2285   rowcount = 0; nzcount = 0;
2286   for (i=0; i<mbs; i++) {
2287     dcount  = 0;
2288     odcount = 0;
2289     for (j=0; j<bs; j++) {
2290       kmax = locrowlens[rowcount];
2291       for (k=0; k<kmax; k++) {
2292         tmp = mycols[nzcount++]/bs; /* block col. index */
2293         if (!mask[tmp]) {
2294           mask[tmp] = 1;
2295           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2296           else masked1[dcount++] = tmp; /* entry in diag portion */
2297         }
2298       }
2299       rowcount++;
2300     }
2301 
2302     dlens[i]  = dcount;  /* d_nzz[i] */
2303     odlens[i] = odcount; /* o_nzz[i] */
2304 
2305     /* zero out the mask elements we set */
2306     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2307     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2308   }
2309 
2310   /* create our matrix */
2311   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2312   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2313   ierr = MatSetType(A,type);CHKERRQ(ierr);
2314   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2315   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2316 
2317   if (!rank) {
2318     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2319     /* read in my part of the matrix numerical values  */
2320     nz = procsnz[0];
2321     vals = buf;
2322     mycols = ibuf;
2323     if (size == 1)  nz -= extra_rows;
2324     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2325     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2326 
2327     /* insert into matrix */
2328     jj      = rstart*bs;
2329     for (i=0; i<m; i++) {
2330       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2331       mycols += locrowlens[i];
2332       vals   += locrowlens[i];
2333       jj++;
2334     }
2335 
2336     /* read in other processors (except the last one) and ship out */
2337     for (i=1; i<size-1; i++) {
2338       nz   = procsnz[i];
2339       vals = buf;
2340       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2341       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2342     }
2343     /* the last proc */
2344     if (size != 1){
2345       nz   = procsnz[i] - extra_rows;
2346       vals = buf;
2347       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2348       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2349       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2350     }
2351     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2352 
2353   } else {
2354     /* receive numeric values */
2355     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2356 
2357     /* receive message of values*/
2358     vals   = buf;
2359     mycols = ibuf;
2360     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2361     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2362     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2363 
2364     /* insert into matrix */
2365     jj      = rstart*bs;
2366     for (i=0; i<m; i++) {
2367       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2368       mycols += locrowlens[i];
2369       vals   += locrowlens[i];
2370       jj++;
2371     }
2372   }
2373 
2374   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2375   ierr = PetscFree(buf);CHKERRQ(ierr);
2376   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2377   ierr = PetscFree(rowners);CHKERRQ(ierr);
2378   ierr = PetscFree(dlens);CHKERRQ(ierr);
2379   ierr = PetscFree(mask);CHKERRQ(ierr);
2380   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2381   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2382   *newmat = A;
2383   PetscFunctionReturn(0);
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2388 /*XXXXX@
2389    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2390 
2391    Input Parameters:
2392 .  mat  - the matrix
2393 .  fact - factor
2394 
2395    Collective on Mat
2396 
2397    Level: advanced
2398 
2399   Notes:
2400    This can also be set by the command line option: -mat_use_hash_table fact
2401 
2402 .keywords: matrix, hashtable, factor, HT
2403 
2404 .seealso: MatSetOption()
2405 @XXXXX*/
2406 
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2410 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2411 {
2412   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2413   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2414   PetscReal      atmp;
2415   PetscReal      *work,*svalues,*rvalues;
2416   PetscErrorCode ierr;
2417   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2418   PetscMPIInt    rank,size;
2419   PetscInt       *rowners_bs,dest,count,source;
2420   PetscScalar    *va;
2421   MatScalar      *ba;
2422   MPI_Status     stat;
2423 
2424   PetscFunctionBegin;
2425   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2426   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2427 
2428   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2429   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2430 
2431   bs   = A->rmap.bs;
2432   mbs  = a->mbs;
2433   Mbs  = a->Mbs;
2434   ba   = b->a;
2435   bi   = b->i;
2436   bj   = b->j;
2437 
2438   /* find ownerships */
2439   rowners_bs = A->rmap.range;
2440 
2441   /* each proc creates an array to be distributed */
2442   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2443   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2444 
2445   /* row_max for B */
2446   if (rank != size-1){
2447     for (i=0; i<mbs; i++) {
2448       ncols = bi[1] - bi[0]; bi++;
2449       brow  = bs*i;
2450       for (j=0; j<ncols; j++){
2451         bcol = bs*(*bj);
2452         for (kcol=0; kcol<bs; kcol++){
2453           col = bcol + kcol;                 /* local col index */
2454           col += rowners_bs[rank+1];      /* global col index */
2455           for (krow=0; krow<bs; krow++){
2456             atmp = PetscAbsScalar(*ba); ba++;
2457             row = brow + krow;    /* local row index */
2458             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2459             if (work[col] < atmp) work[col] = atmp;
2460           }
2461         }
2462         bj++;
2463       }
2464     }
2465 
2466     /* send values to its owners */
2467     for (dest=rank+1; dest<size; dest++){
2468       svalues = work + rowners_bs[dest];
2469       count   = rowners_bs[dest+1]-rowners_bs[dest];
2470       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2471     }
2472   }
2473 
2474   /* receive values */
2475   if (rank){
2476     rvalues = work;
2477     count   = rowners_bs[rank+1]-rowners_bs[rank];
2478     for (source=0; source<rank; source++){
2479       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2480       /* process values */
2481       for (i=0; i<count; i++){
2482         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2483       }
2484     }
2485   }
2486 
2487   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2488   ierr = PetscFree(work);CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatRelax_MPISBAIJ"
2494 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2495 {
2496   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2497   PetscErrorCode ierr;
2498   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2499   PetscScalar    *x,*b,*ptr,zero=0.0;
2500   Vec            bb1;
2501 
2502   PetscFunctionBegin;
2503   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2504   if (bs > 1)
2505     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2506 
2507   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2508     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2509       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2510       its--;
2511     }
2512 
2513     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2514     while (its--){
2515 
2516       /* lower triangular part: slvec0b = - B^T*xx */
2517       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2518 
2519       /* copy xx into slvec0a */
2520       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2521       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2522       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2523       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2524 
2525       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2526 
2527       /* copy bb into slvec1a */
2528       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2529       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2530       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2531       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2532 
2533       /* set slvec1b = 0 */
2534       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2535 
2536       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2537       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2538       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2539       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2540 
2541       /* upper triangular part: bb1 = bb1 - B*x */
2542       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2543 
2544       /* local diagonal sweep */
2545       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2546     }
2547     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2548   } else {
2549     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2550   }
2551   PetscFunctionReturn(0);
2552 }
2553 
2554 #undef __FUNCT__
2555 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2556 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2557 {
2558   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2559   PetscErrorCode ierr;
2560   Vec            lvec1,bb1;
2561 
2562   PetscFunctionBegin;
2563   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2564   if (matin->rmap.bs > 1)
2565     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2566 
2567   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2568     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2569       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2570       its--;
2571     }
2572 
2573     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2574     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2575     while (its--){
2576       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2577 
2578       /* lower diagonal part: bb1 = bb - B^T*xx */
2579       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2580       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2581 
2582       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2583       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2584       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2585 
2586       /* upper diagonal part: bb1 = bb1 - B*x */
2587       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2589 
2590       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2591 
2592       /* diagonal sweep */
2593       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2594     }
2595     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2596     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2597   } else {
2598     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2599   }
2600   PetscFunctionReturn(0);
2601 }
2602 
2603