xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision be1d678a52e6eff2808b2fa31ae986cdbf03c9fe)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
112         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
113         new_j = (PetscInt*)(new_a + bs2*new_nz); \
114         new_i = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 #ifndef MatSetValues_SeqBAIJ_B_Private
155 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
185  \
186         /* malloc new storage space */ \
187         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
188         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
189         new_j = (PetscInt*)(new_a + bs2*new_nz); \
190         new_i = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 #endif
231 
232 #if defined(PETSC_USE_MAT_SINGLE)
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValues_MPISBAIJ"
235 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
238   PetscErrorCode ierr;
239   PetscInt       i,N = m*n;
240   MatScalar      *vsingle;
241 
242   PetscFunctionBegin;
243   if (N > b->setvalueslen) {
244     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
245     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
246     b->setvalueslen  = N;
247   }
248   vsingle = b->setvaluescopy;
249 
250   for (i=0; i<N; i++) {
251     vsingle[i] = v[i];
252   }
253   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
254   PetscFunctionReturn(0);
255 }
256 
257 #undef __FUNCT__
258 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
259 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
260 {
261   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
262   PetscErrorCode ierr;
263   PetscInt       i,N = m*n*b->bs2;
264   MatScalar      *vsingle;
265 
266   PetscFunctionBegin;
267   if (N > b->setvalueslen) {
268     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
269     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
270     b->setvalueslen  = N;
271   }
272   vsingle = b->setvaluescopy;
273   for (i=0; i<N; i++) {
274     vsingle[i] = v[i];
275   }
276   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
277   PetscFunctionReturn(0);
278 }
279 #endif
280 
281 /* Only add/insert a(i,j) with i<=j (blocks).
282    Any a(i,j) with i>j input by user is ingored.
283 */
284 #undef __FUNCT__
285 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
286 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
287 {
288   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
289   MatScalar      value;
290   PetscTruth     roworiented = baij->roworiented;
291   PetscErrorCode ierr;
292   PetscInt       i,j,row,col;
293   PetscInt       rstart_orig=baij->rstart_bs;
294   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
295   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
296 
297   /* Some Variables required in the macro */
298   Mat            A = baij->A;
299   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
300   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
301   MatScalar      *aa=a->a;
302 
303   Mat            B = baij->B;
304   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
305   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
306   MatScalar     *ba=b->a;
307 
308   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
309   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
310   MatScalar     *ap,*bap;
311 
312   /* for stash */
313   PetscInt      n_loc, *in_loc=0;
314   MatScalar     *v_loc=0;
315 
316   PetscFunctionBegin;
317 
318   if(!baij->donotstash){
319     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
320     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
321   }
322 
323   for (i=0; i<m; i++) {
324     if (im[i] < 0) continue;
325 #if defined(PETSC_USE_DEBUG)
326     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
327 #endif
328     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
329       row = im[i] - rstart_orig;              /* local row index */
330       for (j=0; j<n; j++) {
331         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
332         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
333           col = in[j] - cstart_orig;          /* local col index */
334           brow = row/bs; bcol = col/bs;
335           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
336           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
337           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
338           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
339         } else if (in[j] < 0) continue;
340 #if defined(PETSC_USE_DEBUG)
341         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
342 #endif
343         else {  /* off-diag entry (B) */
344           if (mat->was_assembled) {
345             if (!baij->colmap) {
346               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
347             }
348 #if defined (PETSC_USE_CTABLE)
349             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
350             col  = col - 1;
351 #else
352             col = baij->colmap[in[j]/bs] - 1;
353 #endif
354             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
355               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
356               col =  in[j];
357               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
358               B = baij->B;
359               b = (Mat_SeqBAIJ*)(B)->data;
360               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
361               ba=b->a;
362             } else col += in[j]%bs;
363           } else col = in[j];
364           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
365           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
366           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
367         }
368       }
369     } else {  /* off processor entry */
370       if (!baij->donotstash) {
371         n_loc = 0;
372         for (j=0; j<n; j++){
373           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
374           in_loc[n_loc] = in[j];
375           if (roworiented) {
376             v_loc[n_loc] = v[i*n+j];
377           } else {
378             v_loc[n_loc] = v[j*m+i];
379           }
380           n_loc++;
381         }
382         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
383       }
384     }
385   }
386 
387   if(!baij->donotstash){
388     ierr = PetscFree(in_loc);CHKERRQ(ierr);
389     ierr = PetscFree(v_loc);CHKERRQ(ierr);
390   }
391   PetscFunctionReturn(0);
392 }
393 
394 #undef __FUNCT__
395 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
396 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
397 {
398   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
399   const MatScalar *value;
400   MatScalar       *barray=baij->barray;
401   PetscTruth      roworiented = baij->roworiented;
402   PetscErrorCode  ierr;
403   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
404   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
405   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
406 
407   PetscFunctionBegin;
408   if(!barray) {
409     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
410     baij->barray = barray;
411   }
412 
413   if (roworiented) {
414     stepval = (n-1)*bs;
415   } else {
416     stepval = (m-1)*bs;
417   }
418   for (i=0; i<m; i++) {
419     if (im[i] < 0) continue;
420 #if defined(PETSC_USE_DEBUG)
421     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
422 #endif
423     if (im[i] >= rstart && im[i] < rend) {
424       row = im[i] - rstart;
425       for (j=0; j<n; j++) {
426         /* If NumCol = 1 then a copy is not required */
427         if ((roworiented) && (n == 1)) {
428           barray = (MatScalar*) v + i*bs2;
429         } else if((!roworiented) && (m == 1)) {
430           barray = (MatScalar*) v + j*bs2;
431         } else { /* Here a copy is required */
432           if (roworiented) {
433             value = v + i*(stepval+bs)*bs + j*bs;
434           } else {
435             value = v + j*(stepval+bs)*bs + i*bs;
436           }
437           for (ii=0; ii<bs; ii++,value+=stepval) {
438             for (jj=0; jj<bs; jj++) {
439               *barray++  = *value++;
440             }
441           }
442           barray -=bs2;
443         }
444 
445         if (in[j] >= cstart && in[j] < cend){
446           col  = in[j] - cstart;
447           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
448         }
449         else if (in[j] < 0) continue;
450 #if defined(PETSC_USE_DEBUG)
451         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
452 #endif
453         else {
454           if (mat->was_assembled) {
455             if (!baij->colmap) {
456               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
457             }
458 
459 #if defined(PETSC_USE_DEBUG)
460 #if defined (PETSC_USE_CTABLE)
461             { PetscInt data;
462               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
463               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
464             }
465 #else
466             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
467 #endif
468 #endif
469 #if defined (PETSC_USE_CTABLE)
470 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
471             col  = (col - 1)/bs;
472 #else
473             col = (baij->colmap[in[j]] - 1)/bs;
474 #endif
475             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
476               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
477               col =  in[j];
478             }
479           }
480           else col = in[j];
481           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
482         }
483       }
484     } else {
485       if (!baij->donotstash) {
486         if (roworiented) {
487           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
488         } else {
489           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
490         }
491       }
492     }
493   }
494   PetscFunctionReturn(0);
495 }
496 
497 #undef __FUNCT__
498 #define __FUNCT__ "MatGetValues_MPISBAIJ"
499 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
500 {
501   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
502   PetscErrorCode ierr;
503   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
504   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
505 
506   PetscFunctionBegin;
507   for (i=0; i<m; i++) {
508     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
509     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
510     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
511       row = idxm[i] - bsrstart;
512       for (j=0; j<n; j++) {
513         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
514         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
515         if (idxn[j] >= bscstart && idxn[j] < bscend){
516           col = idxn[j] - bscstart;
517           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
518         } else {
519           if (!baij->colmap) {
520             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
521           }
522 #if defined (PETSC_USE_CTABLE)
523           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
524           data --;
525 #else
526           data = baij->colmap[idxn[j]/bs]-1;
527 #endif
528           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
529           else {
530             col  = data + idxn[j]%bs;
531             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
532           }
533         }
534       }
535     } else {
536       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
537     }
538   }
539  PetscFunctionReturn(0);
540 }
541 
542 #undef __FUNCT__
543 #define __FUNCT__ "MatNorm_MPISBAIJ"
544 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
545 {
546   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
547   PetscErrorCode ierr;
548   PetscReal      sum[2],*lnorm2;
549 
550   PetscFunctionBegin;
551   if (baij->size == 1) {
552     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
553   } else {
554     if (type == NORM_FROBENIUS) {
555       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
556       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
557       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
558       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
559       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
560       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
561       *norm = sqrt(sum[0] + 2*sum[1]);
562       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
563     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
564       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
565       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
566       PetscReal    *rsum,*rsum2,vabs;
567       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
568       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
569       MatScalar    *v;
570 
571       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
572       rsum2 = rsum + mat->N;
573       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
574       /* Amat */
575       v = amat->a; jj = amat->j;
576       for (brow=0; brow<mbs; brow++) {
577         grow = bs*(rstart + brow);
578         nz = amat->i[brow+1] - amat->i[brow];
579         for (bcol=0; bcol<nz; bcol++){
580           gcol = bs*(rstart + *jj); jj++;
581           for (col=0; col<bs; col++){
582             for (row=0; row<bs; row++){
583               vabs = PetscAbsScalar(*v); v++;
584               rsum[gcol+col] += vabs;
585               /* non-diagonal block */
586               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
587             }
588           }
589         }
590       }
591       /* Bmat */
592       v = bmat->a; jj = bmat->j;
593       for (brow=0; brow<mbs; brow++) {
594         grow = bs*(rstart + brow);
595         nz = bmat->i[brow+1] - bmat->i[brow];
596         for (bcol=0; bcol<nz; bcol++){
597           gcol = bs*garray[*jj]; jj++;
598           for (col=0; col<bs; col++){
599             for (row=0; row<bs; row++){
600               vabs = PetscAbsScalar(*v); v++;
601               rsum[gcol+col] += vabs;
602               rsum[grow+row] += vabs;
603             }
604           }
605         }
606       }
607       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
608       *norm = 0.0;
609       for (col=0; col<mat->N; col++) {
610         if (rsum2[col] > *norm) *norm = rsum2[col];
611       }
612       ierr = PetscFree(rsum);CHKERRQ(ierr);
613     } else {
614       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
615     }
616   }
617   PetscFunctionReturn(0);
618 }
619 
620 #undef __FUNCT__
621 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
622 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
623 {
624   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
625   PetscErrorCode ierr;
626   PetscInt       nstash,reallocs;
627   InsertMode     addv;
628 
629   PetscFunctionBegin;
630   if (baij->donotstash) {
631     PetscFunctionReturn(0);
632   }
633 
634   /* make sure all processors are either in INSERTMODE or ADDMODE */
635   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
636   if (addv == (ADD_VALUES|INSERT_VALUES)) {
637     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
638   }
639   mat->insertmode = addv; /* in case this processor had no cache */
640 
641   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
642   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
643   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
644   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
645   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
646   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
647   PetscFunctionReturn(0);
648 }
649 
650 #undef __FUNCT__
651 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
652 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
653 {
654   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
655   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
656   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
657   PetscErrorCode ierr;
658   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
659   PetscInt       *row,*col,other_disassembled;
660   PetscMPIInt    n;
661   PetscTruth     r1,r2,r3;
662   MatScalar      *val;
663   InsertMode     addv = mat->insertmode;
664 
665   PetscFunctionBegin;
666 
667   if (!baij->donotstash) {
668     while (1) {
669       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
670       if (!flg) break;
671 
672       for (i=0; i<n;) {
673         /* Now identify the consecutive vals belonging to the same row */
674         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
675         if (j < n) ncols = j-i;
676         else       ncols = n-i;
677         /* Now assemble all these values with a single function call */
678         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
679         i = j;
680       }
681     }
682     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
683     /* Now process the block-stash. Since the values are stashed column-oriented,
684        set the roworiented flag to column oriented, and after MatSetValues()
685        restore the original flags */
686     r1 = baij->roworiented;
687     r2 = a->roworiented;
688     r3 = b->roworiented;
689     baij->roworiented = PETSC_FALSE;
690     a->roworiented    = PETSC_FALSE;
691     b->roworiented    = PETSC_FALSE;
692     while (1) {
693       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
694       if (!flg) break;
695 
696       for (i=0; i<n;) {
697         /* Now identify the consecutive vals belonging to the same row */
698         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
699         if (j < n) ncols = j-i;
700         else       ncols = n-i;
701         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
702         i = j;
703       }
704     }
705     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
706     baij->roworiented = r1;
707     a->roworiented    = r2;
708     b->roworiented    = r3;
709   }
710 
711   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
712   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
713 
714   /* determine if any processor has disassembled, if so we must
715      also disassemble ourselfs, in order that we may reassemble. */
716   /*
717      if nonzero structure of submatrix B cannot change then we know that
718      no processor disassembled thus we can skip this stuff
719   */
720   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
721     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
722     if (mat->was_assembled && !other_disassembled) {
723       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
724     }
725   }
726 
727   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
728     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
729   }
730   b->compressedrow.use = PETSC_TRUE;
731   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
733 
734   if (baij->rowvalues) {
735     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
736     baij->rowvalues = 0;
737   }
738 
739   PetscFunctionReturn(0);
740 }
741 
742 #undef __FUNCT__
743 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
744 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
745 {
746   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
747   PetscErrorCode    ierr;
748   PetscInt          bs = mat->bs;
749   PetscMPIInt       size = baij->size,rank = baij->rank;
750   PetscTruth        iascii,isdraw;
751   PetscViewer       sviewer;
752   PetscViewerFormat format;
753 
754   PetscFunctionBegin;
755   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
756   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
757   if (iascii) {
758     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
759     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
760       MatInfo info;
761       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
762       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
763       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
764               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
765               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
766       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
767       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
768       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
769       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
770       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
771       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
772       PetscFunctionReturn(0);
773     } else if (format == PETSC_VIEWER_ASCII_INFO) {
774       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
775       PetscFunctionReturn(0);
776     }
777   }
778 
779   if (isdraw) {
780     PetscDraw       draw;
781     PetscTruth isnull;
782     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
783     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
784   }
785 
786   if (size == 1) {
787     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
788     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
789   } else {
790     /* assemble the entire matrix onto first processor. */
791     Mat         A;
792     Mat_SeqSBAIJ *Aloc;
793     Mat_SeqBAIJ *Bloc;
794     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
795     MatScalar   *a;
796 
797     /* Should this be the same type as mat? */
798     if (!rank) {
799       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
800     } else {
801       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
802     }
803     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
804     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
805     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
806 
807     /* copy over the A part */
808     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
809     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
810     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
811 
812     for (i=0; i<mbs; i++) {
813       rvals[0] = bs*(baij->rstart + i);
814       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
815       for (j=ai[i]; j<ai[i+1]; j++) {
816         col = (baij->cstart+aj[j])*bs;
817         for (k=0; k<bs; k++) {
818           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
819           col++; a += bs;
820         }
821       }
822     }
823     /* copy over the B part */
824     Bloc = (Mat_SeqBAIJ*)baij->B->data;
825     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
826     for (i=0; i<mbs; i++) {
827       rvals[0] = bs*(baij->rstart + i);
828       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
829       for (j=ai[i]; j<ai[i+1]; j++) {
830         col = baij->garray[aj[j]]*bs;
831         for (k=0; k<bs; k++) {
832           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
833           col++; a += bs;
834         }
835       }
836     }
837     ierr = PetscFree(rvals);CHKERRQ(ierr);
838     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
840     /*
841        Everyone has to call to draw the matrix since the graphics waits are
842        synchronized across all processors that share the PetscDraw object
843     */
844     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
845     if (!rank) {
846       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
847       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
848     }
849     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
850     ierr = MatDestroy(A);CHKERRQ(ierr);
851   }
852   PetscFunctionReturn(0);
853 }
854 
855 #undef __FUNCT__
856 #define __FUNCT__ "MatView_MPISBAIJ"
857 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
858 {
859   PetscErrorCode ierr;
860   PetscTruth     iascii,isdraw,issocket,isbinary;
861 
862   PetscFunctionBegin;
863   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
864   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
865   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
866   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
867   if (iascii || isdraw || issocket || isbinary) {
868     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
869   } else {
870     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
871   }
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatDestroy_MPISBAIJ"
877 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
878 {
879   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
880   PetscErrorCode ierr;
881 
882   PetscFunctionBegin;
883 #if defined(PETSC_USE_LOG)
884   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
885 #endif
886   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
887   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
888   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
889   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
890   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
891 #if defined (PETSC_USE_CTABLE)
892   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
893 #else
894   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
895 #endif
896   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
897   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
898   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
899   if (baij->slvec0) {
900     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
901     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
902   }
903   if (baij->slvec1) {
904     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
905     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
906     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
907   }
908   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
909   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
910   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
911   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
912 #if defined(PETSC_USE_MAT_SINGLE)
913   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
914 #endif
915   ierr = PetscFree(baij);CHKERRQ(ierr);
916 
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
920   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
921   PetscFunctionReturn(0);
922 }
923 
924 #undef __FUNCT__
925 #define __FUNCT__ "MatMult_MPISBAIJ"
926 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
927 {
928   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
929   PetscErrorCode ierr;
930   PetscInt       nt,mbs=a->mbs,bs=A->bs;
931   PetscScalar    *x,*from,zero=0.0;
932 
933   PetscFunctionBegin;
934   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
935   if (nt != A->n) {
936     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
937   }
938   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
939   if (nt != A->m) {
940     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
941   }
942 
943   /* diagonal part */
944   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
945   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
946 
947   /* subdiagonal part */
948   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
949 
950   /* copy x into the vec slvec0 */
951   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
952   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
953   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
954   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
955 
956   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
957   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
958   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
959 
960   /* supperdiagonal part */
961   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
962 
963   PetscFunctionReturn(0);
964 }
965 
966 #undef __FUNCT__
967 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
968 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
969 {
970   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
971   PetscErrorCode ierr;
972   PetscInt       nt;
973 
974   PetscFunctionBegin;
975   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
976   if (nt != A->n) {
977     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
978   }
979   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
980   if (nt != A->m) {
981     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
982   }
983 
984   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
985   /* do diagonal part */
986   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
987   /* do supperdiagonal part */
988   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
989   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
990   /* do subdiagonal part */
991   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
992   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
993   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
994 
995   PetscFunctionReturn(0);
996 }
997 
998 #undef __FUNCT__
999 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1000 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1001 {
1002   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1003   PetscErrorCode ierr;
1004   PetscInt       mbs=a->mbs,bs=A->bs;
1005   PetscScalar    *x,*from,zero=0.0;
1006 
1007   PetscFunctionBegin;
1008   /*
1009   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1010   PetscSynchronizedFlush(A->comm);
1011   */
1012   /* diagonal part */
1013   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1014   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1015 
1016   /* subdiagonal part */
1017   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1018 
1019   /* copy x into the vec slvec0 */
1020   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1021   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1022   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1023   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1024 
1025   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1026   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1027   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1028 
1029   /* supperdiagonal part */
1030   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1031 
1032   PetscFunctionReturn(0);
1033 }
1034 
1035 #undef __FUNCT__
1036 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1037 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1038 {
1039   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1040   PetscErrorCode ierr;
1041 
1042   PetscFunctionBegin;
1043   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1044   /* do diagonal part */
1045   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1046   /* do supperdiagonal part */
1047   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1048   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1049 
1050   /* do subdiagonal part */
1051   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1052   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1053   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1054 
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1060 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1061 {
1062   PetscErrorCode ierr;
1063 
1064   PetscFunctionBegin;
1065   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1066   PetscFunctionReturn(0);
1067 }
1068 
1069 #undef __FUNCT__
1070 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1071 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1072 {
1073   PetscErrorCode ierr;
1074 
1075   PetscFunctionBegin;
1076   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 /*
1081   This only works correctly for square matrices where the subblock A->A is the
1082    diagonal block
1083 */
1084 #undef __FUNCT__
1085 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1086 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1087 {
1088   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1089   PetscErrorCode ierr;
1090 
1091   PetscFunctionBegin;
1092   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1093   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1094   PetscFunctionReturn(0);
1095 }
1096 
1097 #undef __FUNCT__
1098 #define __FUNCT__ "MatScale_MPISBAIJ"
1099 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1100 {
1101   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1102   PetscErrorCode ierr;
1103 
1104   PetscFunctionBegin;
1105   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1106   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1107   PetscFunctionReturn(0);
1108 }
1109 
1110 #undef __FUNCT__
1111 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1112 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1113 {
1114   PetscFunctionBegin;
1115   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1121 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1122 {
1123   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1124 
1125   PetscFunctionBegin;
1126   if (!baij->getrowactive) {
1127     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1128   }
1129   baij->getrowactive = PETSC_FALSE;
1130   PetscFunctionReturn(0);
1131 }
1132 
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1135 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1136 {
1137   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1138   PetscErrorCode ierr;
1139 
1140   PetscFunctionBegin;
1141   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1142   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1143   PetscFunctionReturn(0);
1144 }
1145 
1146 #undef __FUNCT__
1147 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1148 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1149 {
1150   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1151   Mat            A = a->A,B = a->B;
1152   PetscErrorCode ierr;
1153   PetscReal      isend[5],irecv[5];
1154 
1155   PetscFunctionBegin;
1156   info->block_size     = (PetscReal)matin->bs;
1157   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1158   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1159   isend[3] = info->memory;  isend[4] = info->mallocs;
1160   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1161   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1162   isend[3] += info->memory;  isend[4] += info->mallocs;
1163   if (flag == MAT_LOCAL) {
1164     info->nz_used      = isend[0];
1165     info->nz_allocated = isend[1];
1166     info->nz_unneeded  = isend[2];
1167     info->memory       = isend[3];
1168     info->mallocs      = isend[4];
1169   } else if (flag == MAT_GLOBAL_MAX) {
1170     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1171     info->nz_used      = irecv[0];
1172     info->nz_allocated = irecv[1];
1173     info->nz_unneeded  = irecv[2];
1174     info->memory       = irecv[3];
1175     info->mallocs      = irecv[4];
1176   } else if (flag == MAT_GLOBAL_SUM) {
1177     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1178     info->nz_used      = irecv[0];
1179     info->nz_allocated = irecv[1];
1180     info->nz_unneeded  = irecv[2];
1181     info->memory       = irecv[3];
1182     info->mallocs      = irecv[4];
1183   } else {
1184     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1185   }
1186   info->rows_global       = (PetscReal)A->M;
1187   info->columns_global    = (PetscReal)A->N;
1188   info->rows_local        = (PetscReal)A->m;
1189   info->columns_local     = (PetscReal)A->N;
1190   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1191   info->fill_ratio_needed = 0;
1192   info->factor_mallocs    = 0;
1193   PetscFunctionReturn(0);
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1198 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1199 {
1200   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1201   PetscErrorCode ierr;
1202 
1203   PetscFunctionBegin;
1204   switch (op) {
1205   case MAT_NO_NEW_NONZERO_LOCATIONS:
1206   case MAT_YES_NEW_NONZERO_LOCATIONS:
1207   case MAT_COLUMNS_UNSORTED:
1208   case MAT_COLUMNS_SORTED:
1209   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1210   case MAT_KEEP_ZEROED_ROWS:
1211   case MAT_NEW_NONZERO_LOCATION_ERR:
1212     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1213     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1214     break;
1215   case MAT_ROW_ORIENTED:
1216     a->roworiented = PETSC_TRUE;
1217     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1218     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1219     break;
1220   case MAT_ROWS_SORTED:
1221   case MAT_ROWS_UNSORTED:
1222   case MAT_YES_NEW_DIAGONALS:
1223     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1224     break;
1225   case MAT_COLUMN_ORIENTED:
1226     a->roworiented = PETSC_FALSE;
1227     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1228     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1229     break;
1230   case MAT_IGNORE_OFF_PROC_ENTRIES:
1231     a->donotstash = PETSC_TRUE;
1232     break;
1233   case MAT_NO_NEW_DIAGONALS:
1234     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1235   case MAT_USE_HASH_TABLE:
1236     a->ht_flag = PETSC_TRUE;
1237     break;
1238   case MAT_NOT_SYMMETRIC:
1239   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1240   case MAT_HERMITIAN:
1241     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1242   case MAT_SYMMETRIC:
1243   case MAT_STRUCTURALLY_SYMMETRIC:
1244   case MAT_NOT_HERMITIAN:
1245   case MAT_SYMMETRY_ETERNAL:
1246   case MAT_NOT_SYMMETRY_ETERNAL:
1247     break;
1248   default:
1249     SETERRQ(PETSC_ERR_SUP,"unknown option");
1250   }
1251   PetscFunctionReturn(0);
1252 }
1253 
1254 #undef __FUNCT__
1255 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1256 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1257 {
1258   PetscErrorCode ierr;
1259   PetscFunctionBegin;
1260   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 #undef __FUNCT__
1265 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1266 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1267 {
1268   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1269   Mat            a=baij->A, b=baij->B;
1270   PetscErrorCode ierr;
1271   PetscInt       nv,m,n;
1272   PetscTruth     flg;
1273 
1274   PetscFunctionBegin;
1275   if (ll != rr){
1276     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1277     if (!flg)
1278       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1279   }
1280   if (!ll) PetscFunctionReturn(0);
1281 
1282   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1283   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1284 
1285   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1286   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1287 
1288   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1289 
1290   /* left diagonalscale the off-diagonal part */
1291   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1292 
1293   /* scale the diagonal part */
1294   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1295 
1296   /* right diagonalscale the off-diagonal part */
1297   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1298   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1304 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1305 {
1306   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1307   MPI_Comm          comm = A->comm;
1308   static PetscTruth called = PETSC_FALSE;
1309   PetscErrorCode    ierr;
1310 
1311   PetscFunctionBegin;
1312   if (!a->rank) {
1313     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1314   }
1315   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1316   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1317   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1318   PetscFunctionReturn(0);
1319 }
1320 
1321 #undef __FUNCT__
1322 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1323 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1324 {
1325   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1326   PetscErrorCode ierr;
1327 
1328   PetscFunctionBegin;
1329   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1334 
1335 #undef __FUNCT__
1336 #define __FUNCT__ "MatEqual_MPISBAIJ"
1337 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1338 {
1339   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1340   Mat            a,b,c,d;
1341   PetscTruth     flg;
1342   PetscErrorCode ierr;
1343 
1344   PetscFunctionBegin;
1345   a = matA->A; b = matA->B;
1346   c = matB->A; d = matB->B;
1347 
1348   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1349   if (flg) {
1350     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1351   }
1352   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1358 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1359 {
1360   PetscErrorCode ierr;
1361 
1362   PetscFunctionBegin;
1363   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1369 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1370 {
1371   PetscErrorCode ierr;
1372   PetscInt       i;
1373   PetscTruth     flg;
1374 
1375   PetscFunctionBegin;
1376   for (i=0; i<n; i++) {
1377     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1378     if (!flg) {
1379       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1380     }
1381   }
1382   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1383   PetscFunctionReturn(0);
1384 }
1385 
1386 
1387 /* -------------------------------------------------------------------*/
1388 static struct _MatOps MatOps_Values = {
1389        MatSetValues_MPISBAIJ,
1390        MatGetRow_MPISBAIJ,
1391        MatRestoreRow_MPISBAIJ,
1392        MatMult_MPISBAIJ,
1393 /* 4*/ MatMultAdd_MPISBAIJ,
1394        MatMultTranspose_MPISBAIJ,
1395        MatMultTransposeAdd_MPISBAIJ,
1396        0,
1397        0,
1398        0,
1399 /*10*/ 0,
1400        0,
1401        0,
1402        MatRelax_MPISBAIJ,
1403        MatTranspose_MPISBAIJ,
1404 /*15*/ MatGetInfo_MPISBAIJ,
1405        MatEqual_MPISBAIJ,
1406        MatGetDiagonal_MPISBAIJ,
1407        MatDiagonalScale_MPISBAIJ,
1408        MatNorm_MPISBAIJ,
1409 /*20*/ MatAssemblyBegin_MPISBAIJ,
1410        MatAssemblyEnd_MPISBAIJ,
1411        0,
1412        MatSetOption_MPISBAIJ,
1413        MatZeroEntries_MPISBAIJ,
1414 /*25*/ 0,
1415        0,
1416        0,
1417        0,
1418        0,
1419 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1420        0,
1421        0,
1422        0,
1423        0,
1424 /*35*/ MatDuplicate_MPISBAIJ,
1425        0,
1426        0,
1427        0,
1428        0,
1429 /*40*/ 0,
1430        MatGetSubMatrices_MPISBAIJ,
1431        MatIncreaseOverlap_MPISBAIJ,
1432        MatGetValues_MPISBAIJ,
1433        0,
1434 /*45*/ MatPrintHelp_MPISBAIJ,
1435        MatScale_MPISBAIJ,
1436        0,
1437        0,
1438        0,
1439 /*50*/ 0,
1440        0,
1441        0,
1442        0,
1443        0,
1444 /*55*/ 0,
1445        0,
1446        MatSetUnfactored_MPISBAIJ,
1447        0,
1448        MatSetValuesBlocked_MPISBAIJ,
1449 /*60*/ 0,
1450        0,
1451        0,
1452        MatGetPetscMaps_Petsc,
1453        0,
1454 /*65*/ 0,
1455        0,
1456        0,
1457        0,
1458        0,
1459 /*70*/ MatGetRowMax_MPISBAIJ,
1460        0,
1461        0,
1462        0,
1463        0,
1464 /*75*/ 0,
1465        0,
1466        0,
1467        0,
1468        0,
1469 /*80*/ 0,
1470        0,
1471        0,
1472        0,
1473        MatLoad_MPISBAIJ,
1474 /*85*/ 0,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*90*/ 0,
1480        0,
1481        0,
1482        0,
1483        0,
1484 /*95*/ 0,
1485        0,
1486        0,
1487        0};
1488 
1489 
1490 EXTERN_C_BEGIN
1491 #undef __FUNCT__
1492 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1493 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1494 {
1495   PetscFunctionBegin;
1496   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1497   *iscopy = PETSC_FALSE;
1498   PetscFunctionReturn(0);
1499 }
1500 EXTERN_C_END
1501 
1502 EXTERN_C_BEGIN
1503 #undef __FUNCT__
1504 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1505 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1506 {
1507   Mat_MPISBAIJ   *b;
1508   PetscErrorCode ierr;
1509   PetscInt       i,mbs,Mbs;
1510 
1511   PetscFunctionBegin;
1512   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1513 
1514   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1515   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1516   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1517   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1518   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1519   if (d_nnz) {
1520     for (i=0; i<B->m/bs; i++) {
1521       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1522     }
1523   }
1524   if (o_nnz) {
1525     for (i=0; i<B->m/bs; i++) {
1526       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1527     }
1528   }
1529   B->preallocated = PETSC_TRUE;
1530   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1531   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1532   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1533   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1534 
1535   b   = (Mat_MPISBAIJ*)B->data;
1536   mbs = B->m/bs;
1537   Mbs = B->M/bs;
1538   if (mbs*bs != B->m) {
1539     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1540   }
1541 
1542   B->bs  = bs;
1543   b->bs2 = bs*bs;
1544   b->mbs = mbs;
1545   b->nbs = mbs;
1546   b->Mbs = Mbs;
1547   b->Nbs = Mbs;
1548 
1549   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1550   b->rowners[0]    = 0;
1551   for (i=2; i<=b->size; i++) {
1552     b->rowners[i] += b->rowners[i-1];
1553   }
1554   b->rstart    = b->rowners[b->rank];
1555   b->rend      = b->rowners[b->rank+1];
1556   b->cstart    = b->rstart;
1557   b->cend      = b->rend;
1558   for (i=0; i<=b->size; i++) {
1559     b->rowners_bs[i] = b->rowners[i]*bs;
1560   }
1561   b->rstart_bs = b-> rstart*bs;
1562   b->rend_bs   = b->rend*bs;
1563 
1564   b->cstart_bs = b->cstart*bs;
1565   b->cend_bs   = b->cend*bs;
1566 
1567   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1568   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1569   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1570   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1571 
1572   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1573   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1574   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1575   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1576 
1577   /* build cache for off array entries formed */
1578   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1579 
1580   PetscFunctionReturn(0);
1581 }
1582 EXTERN_C_END
1583 
1584 /*MC
1585    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1586    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1587 
1588    Options Database Keys:
1589 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1590 
1591   Level: beginner
1592 
1593 .seealso: MatCreateMPISBAIJ
1594 M*/
1595 
1596 EXTERN_C_BEGIN
1597 #undef __FUNCT__
1598 #define __FUNCT__ "MatCreate_MPISBAIJ"
1599 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1600 {
1601   Mat_MPISBAIJ   *b;
1602   PetscErrorCode ierr;
1603   PetscTruth     flg;
1604 
1605   PetscFunctionBegin;
1606 
1607   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1608   B->data = (void*)b;
1609   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1610 
1611   B->ops->destroy    = MatDestroy_MPISBAIJ;
1612   B->ops->view       = MatView_MPISBAIJ;
1613   B->mapping    = 0;
1614   B->factor     = 0;
1615   B->assembled  = PETSC_FALSE;
1616 
1617   B->insertmode = NOT_SET_VALUES;
1618   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1619   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1620 
1621   /* build local table of row and column ownerships */
1622   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1623   b->cowners    = b->rowners + b->size + 2;
1624   b->rowners_bs = b->cowners + b->size + 2;
1625   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1626 
1627   /* build cache for off array entries formed */
1628   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1629   b->donotstash  = PETSC_FALSE;
1630   b->colmap      = PETSC_NULL;
1631   b->garray      = PETSC_NULL;
1632   b->roworiented = PETSC_TRUE;
1633 
1634 #if defined(PETSC_USE_MAT_SINGLE)
1635   /* stuff for MatSetValues_XXX in single precision */
1636   b->setvalueslen     = 0;
1637   b->setvaluescopy    = PETSC_NULL;
1638 #endif
1639 
1640   /* stuff used in block assembly */
1641   b->barray       = 0;
1642 
1643   /* stuff used for matrix vector multiply */
1644   b->lvec         = 0;
1645   b->Mvctx        = 0;
1646   b->slvec0       = 0;
1647   b->slvec0b      = 0;
1648   b->slvec1       = 0;
1649   b->slvec1a      = 0;
1650   b->slvec1b      = 0;
1651   b->sMvctx       = 0;
1652 
1653   /* stuff for MatGetRow() */
1654   b->rowindices   = 0;
1655   b->rowvalues    = 0;
1656   b->getrowactive = PETSC_FALSE;
1657 
1658   /* hash table stuff */
1659   b->ht           = 0;
1660   b->hd           = 0;
1661   b->ht_size      = 0;
1662   b->ht_flag      = PETSC_FALSE;
1663   b->ht_fact      = 0;
1664   b->ht_total_ct  = 0;
1665   b->ht_insert_ct = 0;
1666 
1667   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1668   if (flg) {
1669     PetscReal fact = 1.39;
1670     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1671     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1672     if (fact <= 1.0) fact = 1.39;
1673     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1674     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1675   }
1676   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1677                                      "MatStoreValues_MPISBAIJ",
1678                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1679   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1680                                      "MatRetrieveValues_MPISBAIJ",
1681                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1682   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1683                                      "MatGetDiagonalBlock_MPISBAIJ",
1684                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1685   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1686                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1687                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1688   B->symmetric                  = PETSC_TRUE;
1689   B->structurally_symmetric     = PETSC_TRUE;
1690   B->symmetric_set              = PETSC_TRUE;
1691   B->structurally_symmetric_set = PETSC_TRUE;
1692   PetscFunctionReturn(0);
1693 }
1694 EXTERN_C_END
1695 
1696 /*MC
1697    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1698 
1699    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1700    and MATMPISBAIJ otherwise.
1701 
1702    Options Database Keys:
1703 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1704 
1705   Level: beginner
1706 
1707 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1708 M*/
1709 
1710 EXTERN_C_BEGIN
1711 #undef __FUNCT__
1712 #define __FUNCT__ "MatCreate_SBAIJ"
1713 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1714 {
1715   PetscErrorCode ierr;
1716   PetscMPIInt    size;
1717 
1718   PetscFunctionBegin;
1719   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1720   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1721   if (size == 1) {
1722     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1723   } else {
1724     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1725   }
1726   PetscFunctionReturn(0);
1727 }
1728 EXTERN_C_END
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1732 /*@C
1733    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1734    the user should preallocate the matrix storage by setting the parameters
1735    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1736    performance can be increased by more than a factor of 50.
1737 
1738    Collective on Mat
1739 
1740    Input Parameters:
1741 +  A - the matrix
1742 .  bs   - size of blockk
1743 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1744            submatrix  (same for all local rows)
1745 .  d_nnz - array containing the number of block nonzeros in the various block rows
1746            in the upper triangular and diagonal part of the in diagonal portion of the local
1747            (possibly different for each block row) or PETSC_NULL.  You must leave room
1748            for the diagonal entry even if it is zero.
1749 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1750            submatrix (same for all local rows).
1751 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1752            off-diagonal portion of the local submatrix (possibly different for
1753            each block row) or PETSC_NULL.
1754 
1755 
1756    Options Database Keys:
1757 .   -mat_no_unroll - uses code that does not unroll the loops in the
1758                      block calculations (much slower)
1759 .   -mat_block_size - size of the blocks to use
1760 
1761    Notes:
1762 
1763    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1764    than it must be used on all processors that share the object for that argument.
1765 
1766    If the *_nnz parameter is given then the *_nz parameter is ignored
1767 
1768    Storage Information:
1769    For a square global matrix we define each processor's diagonal portion
1770    to be its local rows and the corresponding columns (a square submatrix);
1771    each processor's off-diagonal portion encompasses the remainder of the
1772    local matrix (a rectangular submatrix).
1773 
1774    The user can specify preallocated storage for the diagonal part of
1775    the local submatrix with either d_nz or d_nnz (not both).  Set
1776    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1777    memory allocation.  Likewise, specify preallocated storage for the
1778    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1779 
1780    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1781    the figure below we depict these three local rows and all columns (0-11).
1782 
1783 .vb
1784            0 1 2 3 4 5 6 7 8 9 10 11
1785           -------------------
1786    row 3  |  o o o d d d o o o o o o
1787    row 4  |  o o o d d d o o o o o o
1788    row 5  |  o o o d d d o o o o o o
1789           -------------------
1790 .ve
1791 
1792    Thus, any entries in the d locations are stored in the d (diagonal)
1793    submatrix, and any entries in the o locations are stored in the
1794    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1795    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1796 
1797    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1798    plus the diagonal part of the d matrix,
1799    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1800    In general, for PDE problems in which most nonzeros are near the diagonal,
1801    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1802    or you will get TERRIBLE performance; see the users' manual chapter on
1803    matrices.
1804 
1805    Level: intermediate
1806 
1807 .keywords: matrix, block, aij, compressed row, sparse, parallel
1808 
1809 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1810 @*/
1811 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1812 {
1813   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1814 
1815   PetscFunctionBegin;
1816   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1817   if (f) {
1818     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1819   }
1820   PetscFunctionReturn(0);
1821 }
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatCreateMPISBAIJ"
1825 /*@C
1826    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1827    (block compressed row).  For good matrix assembly performance
1828    the user should preallocate the matrix storage by setting the parameters
1829    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1830    performance can be increased by more than a factor of 50.
1831 
1832    Collective on MPI_Comm
1833 
1834    Input Parameters:
1835 +  comm - MPI communicator
1836 .  bs   - size of blockk
1837 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1838            This value should be the same as the local size used in creating the
1839            y vector for the matrix-vector product y = Ax.
1840 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1841            This value should be the same as the local size used in creating the
1842            x vector for the matrix-vector product y = Ax.
1843 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1844 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1845 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1846            submatrix  (same for all local rows)
1847 .  d_nnz - array containing the number of block nonzeros in the various block rows
1848            in the upper triangular portion of the in diagonal portion of the local
1849            (possibly different for each block block row) or PETSC_NULL.
1850            You must leave room for the diagonal entry even if it is zero.
1851 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1852            submatrix (same for all local rows).
1853 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1854            off-diagonal portion of the local submatrix (possibly different for
1855            each block row) or PETSC_NULL.
1856 
1857    Output Parameter:
1858 .  A - the matrix
1859 
1860    Options Database Keys:
1861 .   -mat_no_unroll - uses code that does not unroll the loops in the
1862                      block calculations (much slower)
1863 .   -mat_block_size - size of the blocks to use
1864 .   -mat_mpi - use the parallel matrix data structures even on one processor
1865                (defaults to using SeqBAIJ format on one processor)
1866 
1867    Notes:
1868    The number of rows and columns must be divisible by blocksize.
1869 
1870    The user MUST specify either the local or global matrix dimensions
1871    (possibly both).
1872 
1873    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1874    than it must be used on all processors that share the object for that argument.
1875 
1876    If the *_nnz parameter is given then the *_nz parameter is ignored
1877 
1878    Storage Information:
1879    For a square global matrix we define each processor's diagonal portion
1880    to be its local rows and the corresponding columns (a square submatrix);
1881    each processor's off-diagonal portion encompasses the remainder of the
1882    local matrix (a rectangular submatrix).
1883 
1884    The user can specify preallocated storage for the diagonal part of
1885    the local submatrix with either d_nz or d_nnz (not both).  Set
1886    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1887    memory allocation.  Likewise, specify preallocated storage for the
1888    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1889 
1890    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1891    the figure below we depict these three local rows and all columns (0-11).
1892 
1893 .vb
1894            0 1 2 3 4 5 6 7 8 9 10 11
1895           -------------------
1896    row 3  |  o o o d d d o o o o o o
1897    row 4  |  o o o d d d o o o o o o
1898    row 5  |  o o o d d d o o o o o o
1899           -------------------
1900 .ve
1901 
1902    Thus, any entries in the d locations are stored in the d (diagonal)
1903    submatrix, and any entries in the o locations are stored in the
1904    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1905    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1906 
1907    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1908    plus the diagonal part of the d matrix,
1909    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1910    In general, for PDE problems in which most nonzeros are near the diagonal,
1911    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1912    or you will get TERRIBLE performance; see the users' manual chapter on
1913    matrices.
1914 
1915    Level: intermediate
1916 
1917 .keywords: matrix, block, aij, compressed row, sparse, parallel
1918 
1919 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1920 @*/
1921 
1922 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1923 {
1924   PetscErrorCode ierr;
1925   PetscMPIInt    size;
1926 
1927   PetscFunctionBegin;
1928   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1929   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1930   if (size > 1) {
1931     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1932     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1933   } else {
1934     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1935     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1936   }
1937   PetscFunctionReturn(0);
1938 }
1939 
1940 
1941 #undef __FUNCT__
1942 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1943 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1944 {
1945   Mat            mat;
1946   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1947   PetscErrorCode ierr;
1948   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1949   PetscScalar    *array;
1950 
1951   PetscFunctionBegin;
1952   *newmat       = 0;
1953   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1954   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1955   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1956 
1957   mat->factor       = matin->factor;
1958   mat->preallocated = PETSC_TRUE;
1959   mat->assembled    = PETSC_TRUE;
1960   mat->insertmode   = NOT_SET_VALUES;
1961 
1962   a = (Mat_MPISBAIJ*)mat->data;
1963   mat->bs  = matin->bs;
1964   a->bs2   = oldmat->bs2;
1965   a->mbs   = oldmat->mbs;
1966   a->nbs   = oldmat->nbs;
1967   a->Mbs   = oldmat->Mbs;
1968   a->Nbs   = oldmat->Nbs;
1969 
1970   a->rstart       = oldmat->rstart;
1971   a->rend         = oldmat->rend;
1972   a->cstart       = oldmat->cstart;
1973   a->cend         = oldmat->cend;
1974   a->size         = oldmat->size;
1975   a->rank         = oldmat->rank;
1976   a->donotstash   = oldmat->donotstash;
1977   a->roworiented  = oldmat->roworiented;
1978   a->rowindices   = 0;
1979   a->rowvalues    = 0;
1980   a->getrowactive = PETSC_FALSE;
1981   a->barray       = 0;
1982   a->rstart_bs    = oldmat->rstart_bs;
1983   a->rend_bs      = oldmat->rend_bs;
1984   a->cstart_bs    = oldmat->cstart_bs;
1985   a->cend_bs      = oldmat->cend_bs;
1986 
1987   /* hash table stuff */
1988   a->ht           = 0;
1989   a->hd           = 0;
1990   a->ht_size      = 0;
1991   a->ht_flag      = oldmat->ht_flag;
1992   a->ht_fact      = oldmat->ht_fact;
1993   a->ht_total_ct  = 0;
1994   a->ht_insert_ct = 0;
1995 
1996   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1997   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1998   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1999   if (oldmat->colmap) {
2000 #if defined (PETSC_USE_CTABLE)
2001     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2002 #else
2003     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2004     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2005     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2006 #endif
2007   } else a->colmap = 0;
2008 
2009   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2010     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2011     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2012     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2013   } else a->garray = 0;
2014 
2015   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2016   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2017   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2018   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2019 
2020   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2021   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2022   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2023   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2024 
2025   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2026   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2027   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2028   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2029   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2030   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2031   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2032   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2033   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2034   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2035   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2036   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2037   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2038 
2039   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2040   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2041   a->sMvctx = oldmat->sMvctx;
2042   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2043 
2044   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2045   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2046   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2047   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2048   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2049   *newmat = mat;
2050   PetscFunctionReturn(0);
2051 }
2052 
2053 #include "petscsys.h"
2054 
2055 #undef __FUNCT__
2056 #define __FUNCT__ "MatLoad_MPISBAIJ"
2057 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2058 {
2059   Mat            A;
2060   PetscErrorCode ierr;
2061   PetscInt       i,nz,j,rstart,rend;
2062   PetscScalar    *vals,*buf;
2063   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2064   MPI_Status     status;
2065   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2066   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2067   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2068   PetscInt       bs=1,Mbs,mbs,extra_rows;
2069   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2070   PetscInt       dcount,kmax,k,nzcount,tmp;
2071   int            fd;
2072 
2073   PetscFunctionBegin;
2074   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2075 
2076   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2077   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2078   if (!rank) {
2079     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2080     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2081     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2082     if (header[3] < 0) {
2083       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2084     }
2085   }
2086 
2087   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2088   M = header[1]; N = header[2];
2089 
2090   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2091 
2092   /*
2093      This code adds extra rows to make sure the number of rows is
2094      divisible by the blocksize
2095   */
2096   Mbs        = M/bs;
2097   extra_rows = bs - M + bs*(Mbs);
2098   if (extra_rows == bs) extra_rows = 0;
2099   else                  Mbs++;
2100   if (extra_rows &&!rank) {
2101     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2102   }
2103 
2104   /* determine ownership of all rows */
2105   mbs        = Mbs/size + ((Mbs % size) > rank);
2106   m          = mbs*bs;
2107   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2108   browners   = rowners + size + 1;
2109   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2110   rowners[0] = 0;
2111   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2112   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2113   rstart = rowners[rank];
2114   rend   = rowners[rank+1];
2115 
2116   /* distribute row lengths to all processors */
2117   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2118   if (!rank) {
2119     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2120     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2121     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2122     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2123     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2124     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2125     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2126   } else {
2127     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2128   }
2129 
2130   if (!rank) {   /* procs[0] */
2131     /* calculate the number of nonzeros on each processor */
2132     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2133     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2134     for (i=0; i<size; i++) {
2135       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2136         procsnz[i] += rowlengths[j];
2137       }
2138     }
2139     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2140 
2141     /* determine max buffer needed and allocate it */
2142     maxnz = 0;
2143     for (i=0; i<size; i++) {
2144       maxnz = PetscMax(maxnz,procsnz[i]);
2145     }
2146     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2147 
2148     /* read in my part of the matrix column indices  */
2149     nz     = procsnz[0];
2150     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2151     mycols = ibuf;
2152     if (size == 1)  nz -= extra_rows;
2153     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2154     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2155 
2156     /* read in every ones (except the last) and ship off */
2157     for (i=1; i<size-1; i++) {
2158       nz   = procsnz[i];
2159       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2160       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2161     }
2162     /* read in the stuff for the last proc */
2163     if (size != 1) {
2164       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2165       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2166       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2167       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2168     }
2169     ierr = PetscFree(cols);CHKERRQ(ierr);
2170   } else {  /* procs[i], i>0 */
2171     /* determine buffer space needed for message */
2172     nz = 0;
2173     for (i=0; i<m; i++) {
2174       nz += locrowlens[i];
2175     }
2176     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2177     mycols = ibuf;
2178     /* receive message of column indices*/
2179     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2180     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2181     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2182   }
2183 
2184   /* loop over local rows, determining number of off diagonal entries */
2185   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2186   odlens   = dlens + (rend-rstart);
2187   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2188   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2189   masked1  = mask    + Mbs;
2190   masked2  = masked1 + Mbs;
2191   rowcount = 0; nzcount = 0;
2192   for (i=0; i<mbs; i++) {
2193     dcount  = 0;
2194     odcount = 0;
2195     for (j=0; j<bs; j++) {
2196       kmax = locrowlens[rowcount];
2197       for (k=0; k<kmax; k++) {
2198         tmp = mycols[nzcount++]/bs; /* block col. index */
2199         if (!mask[tmp]) {
2200           mask[tmp] = 1;
2201           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2202           else masked1[dcount++] = tmp; /* entry in diag portion */
2203         }
2204       }
2205       rowcount++;
2206     }
2207 
2208     dlens[i]  = dcount;  /* d_nzz[i] */
2209     odlens[i] = odcount; /* o_nzz[i] */
2210 
2211     /* zero out the mask elements we set */
2212     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2213     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2214   }
2215 
2216   /* create our matrix */
2217   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2218   ierr = MatSetType(A,type);CHKERRQ(ierr);
2219   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2220   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2221 
2222   if (!rank) {
2223     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2224     /* read in my part of the matrix numerical values  */
2225     nz = procsnz[0];
2226     vals = buf;
2227     mycols = ibuf;
2228     if (size == 1)  nz -= extra_rows;
2229     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2230     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2231 
2232     /* insert into matrix */
2233     jj      = rstart*bs;
2234     for (i=0; i<m; i++) {
2235       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2236       mycols += locrowlens[i];
2237       vals   += locrowlens[i];
2238       jj++;
2239     }
2240 
2241     /* read in other processors (except the last one) and ship out */
2242     for (i=1; i<size-1; i++) {
2243       nz   = procsnz[i];
2244       vals = buf;
2245       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2246       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2247     }
2248     /* the last proc */
2249     if (size != 1){
2250       nz   = procsnz[i] - extra_rows;
2251       vals = buf;
2252       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2253       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2254       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2255     }
2256     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2257 
2258   } else {
2259     /* receive numeric values */
2260     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2261 
2262     /* receive message of values*/
2263     vals   = buf;
2264     mycols = ibuf;
2265     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2266     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2267     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2268 
2269     /* insert into matrix */
2270     jj      = rstart*bs;
2271     for (i=0; i<m; i++) {
2272       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2273       mycols += locrowlens[i];
2274       vals   += locrowlens[i];
2275       jj++;
2276     }
2277   }
2278 
2279   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2280   ierr = PetscFree(buf);CHKERRQ(ierr);
2281   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2282   ierr = PetscFree(rowners);CHKERRQ(ierr);
2283   ierr = PetscFree(dlens);CHKERRQ(ierr);
2284   ierr = PetscFree(mask);CHKERRQ(ierr);
2285   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2286   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2287   *newmat = A;
2288   PetscFunctionReturn(0);
2289 }
2290 
2291 #undef __FUNCT__
2292 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2293 /*XXXXX@
2294    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2295 
2296    Input Parameters:
2297 .  mat  - the matrix
2298 .  fact - factor
2299 
2300    Collective on Mat
2301 
2302    Level: advanced
2303 
2304   Notes:
2305    This can also be set by the command line option: -mat_use_hash_table fact
2306 
2307 .keywords: matrix, hashtable, factor, HT
2308 
2309 .seealso: MatSetOption()
2310 @XXXXX*/
2311 
2312 
2313 #undef __FUNCT__
2314 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2315 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2316 {
2317   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2318   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2319   PetscReal      atmp;
2320   PetscReal      *work,*svalues,*rvalues;
2321   PetscErrorCode ierr;
2322   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2323   PetscMPIInt    rank,size;
2324   PetscInt       *rowners_bs,dest,count,source;
2325   PetscScalar    *va;
2326   MatScalar      *ba;
2327   MPI_Status     stat;
2328 
2329   PetscFunctionBegin;
2330   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2331   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2332 
2333   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2334   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2335 
2336   bs   = A->bs;
2337   mbs  = a->mbs;
2338   Mbs  = a->Mbs;
2339   ba   = b->a;
2340   bi   = b->i;
2341   bj   = b->j;
2342 
2343   /* find ownerships */
2344   rowners_bs = a->rowners_bs;
2345 
2346   /* each proc creates an array to be distributed */
2347   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2348   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2349 
2350   /* row_max for B */
2351   if (rank != size-1){
2352     for (i=0; i<mbs; i++) {
2353       ncols = bi[1] - bi[0]; bi++;
2354       brow  = bs*i;
2355       for (j=0; j<ncols; j++){
2356         bcol = bs*(*bj);
2357         for (kcol=0; kcol<bs; kcol++){
2358           col = bcol + kcol;                 /* local col index */
2359           col += rowners_bs[rank+1];      /* global col index */
2360           for (krow=0; krow<bs; krow++){
2361             atmp = PetscAbsScalar(*ba); ba++;
2362             row = brow + krow;    /* local row index */
2363             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2364             if (work[col] < atmp) work[col] = atmp;
2365           }
2366         }
2367         bj++;
2368       }
2369     }
2370 
2371     /* send values to its owners */
2372     for (dest=rank+1; dest<size; dest++){
2373       svalues = work + rowners_bs[dest];
2374       count   = rowners_bs[dest+1]-rowners_bs[dest];
2375       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2376     }
2377   }
2378 
2379   /* receive values */
2380   if (rank){
2381     rvalues = work;
2382     count   = rowners_bs[rank+1]-rowners_bs[rank];
2383     for (source=0; source<rank; source++){
2384       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2385       /* process values */
2386       for (i=0; i<count; i++){
2387         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2388       }
2389     }
2390   }
2391 
2392   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2393   ierr = PetscFree(work);CHKERRQ(ierr);
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatRelax_MPISBAIJ"
2399 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2400 {
2401   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2402   PetscErrorCode ierr;
2403   PetscInt       mbs=mat->mbs,bs=matin->bs;
2404   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2405   Vec            bb1;
2406 
2407   PetscFunctionBegin;
2408   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2409   if (bs > 1)
2410     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2411 
2412   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2413     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2414       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2415       its--;
2416     }
2417 
2418     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2419     while (its--){
2420 
2421       /* lower triangular part: slvec0b = - B^T*xx */
2422       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2423 
2424       /* copy xx into slvec0a */
2425       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2426       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2427       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2428       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2429 
2430       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2431 
2432       /* copy bb into slvec1a */
2433       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2434       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2435       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2436       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2437 
2438       /* set slvec1b = 0 */
2439       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2440 
2441       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2442       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2443       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2444       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2445 
2446       /* upper triangular part: bb1 = bb1 - B*x */
2447       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2448 
2449       /* local diagonal sweep */
2450       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2451     }
2452     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2453   } else {
2454     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2455   }
2456   PetscFunctionReturn(0);
2457 }
2458 
2459 #undef __FUNCT__
2460 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2461 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2462 {
2463   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2464   PetscErrorCode ierr;
2465   PetscScalar    mone=-1.0;
2466   Vec            lvec1,bb1;
2467 
2468   PetscFunctionBegin;
2469   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2470   if (matin->bs > 1)
2471     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2472 
2473   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2474     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2475       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2476       its--;
2477     }
2478 
2479     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2480     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2481     while (its--){
2482       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2483 
2484       /* lower diagonal part: bb1 = bb - B^T*xx */
2485       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2486       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2487 
2488       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2489       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2490       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2491 
2492       /* upper diagonal part: bb1 = bb1 - B*x */
2493       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2494       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2495 
2496       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2497 
2498       /* diagonal sweep */
2499       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2500     }
2501     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2502     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2503   } else {
2504     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2505   }
2506   PetscFunctionReturn(0);
2507 }
2508 
2509