xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 6ab11f9b6ca3a3a4762ba278e5cdc46bee711a5b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\
112  \
113         /* copy over old data into new slots */ \
114         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
115         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
116         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
117         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
118         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
119         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
120         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
122         /* free up old matrix storage */ \
123         ierr = MatSeqXAIJFreeAIJ(a->singlemalloc,a->a,a->j,a->i);CHKERRQ(ierr);\
124         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
125         a->singlemalloc = PETSC_TRUE; \
126  \
127         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
128         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
129         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
130         a->maxnz += bs2*CHUNKSIZE; \
131         a->reallocs++; \
132         a->nz++; \
133       } \
134       N = nrow++ - 1;  \
135       /* shift up all the later entries in this row */ \
136       for (ii=N; ii>=_i; ii--) { \
137         rp[ii+1] = rp[ii]; \
138         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
139       } \
140       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
141       rp[_i]                      = bcol;  \
142       ap[bs2*_i + bs*cidx + ridx] = value;  \
143       a_noinsert:; \
144     ailen[brow] = nrow; \
145 }
146 #ifndef MatSetValues_SeqBAIJ_B_Private
147 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
148 { \
149     brow = row/bs;  \
150     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
151     rmax = bimax[brow]; nrow = bilen[brow]; \
152       bcol = col/bs; \
153       ridx = row % bs; cidx = col % bs; \
154       low = 0; high = nrow; \
155       while (high-low > 3) { \
156         t = (low+high)/2; \
157         if (rp[t] > bcol) high = t; \
158         else              low  = t; \
159       } \
160       for (_i=low; _i<high; _i++) { \
161         if (rp[_i] > bcol) break; \
162         if (rp[_i] == bcol) { \
163           bap  = ap +  bs2*_i + bs*cidx + ridx; \
164           if (addv == ADD_VALUES) *bap += value;  \
165           else                    *bap  = value;  \
166           goto b_noinsert; \
167         } \
168       } \
169       if (b->nonew == 1) goto b_noinsert; \
170       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
171       if (nrow >= rmax) { \
172         /* there is no extra room in row, therefore enlarge */ \
173         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
174         MatScalar *new_a; \
175  \
176         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
177  \
178         /* malloc new storage space */ \
179         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,b->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\
180  \
181         /* copy over old data into new slots */ \
182         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
183         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
184         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
185         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
186         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
187         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
188         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
189         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE),ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
190         /* free up old matrix storage */ \
191         ierr = MatSeqXAIJFreeAIJ(b->singlemalloc,b->a,b->j,b->i);CHKERRQ(ierr);\
192         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
193         b->singlemalloc = PETSC_TRUE; \
194  \
195         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
196         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
197         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
198         b->maxnz += bs2*CHUNKSIZE; \
199         b->reallocs++; \
200         b->nz++; \
201       } \
202       N = nrow++ - 1;  \
203       /* shift up all the later entries in this row */ \
204       for (ii=N; ii>=_i; ii--) { \
205         rp[ii+1] = rp[ii]; \
206         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
207       } \
208       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
209       rp[_i]                      = bcol;  \
210       ap[bs2*_i + bs*cidx + ridx] = value;  \
211       b_noinsert:; \
212     bilen[brow] = nrow; \
213 }
214 #endif
215 
216 #if defined(PETSC_USE_MAT_SINGLE)
217 #undef __FUNCT__
218 #define __FUNCT__ "MatSetValues_MPISBAIJ"
219 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
220 {
221   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
222   PetscErrorCode ierr;
223   PetscInt       i,N = m*n;
224   MatScalar      *vsingle;
225 
226   PetscFunctionBegin;
227   if (N > b->setvalueslen) {
228     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
229     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
230     b->setvalueslen  = N;
231   }
232   vsingle = b->setvaluescopy;
233 
234   for (i=0; i<N; i++) {
235     vsingle[i] = v[i];
236   }
237   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
238   PetscFunctionReturn(0);
239 }
240 
241 #undef __FUNCT__
242 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
243 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
244 {
245   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
246   PetscErrorCode ierr;
247   PetscInt       i,N = m*n*b->bs2;
248   MatScalar      *vsingle;
249 
250   PetscFunctionBegin;
251   if (N > b->setvalueslen) {
252     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
253     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
254     b->setvalueslen  = N;
255   }
256   vsingle = b->setvaluescopy;
257   for (i=0; i<N; i++) {
258     vsingle[i] = v[i];
259   }
260   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
261   PetscFunctionReturn(0);
262 }
263 #endif
264 
265 /* Only add/insert a(i,j) with i<=j (blocks).
266    Any a(i,j) with i>j input by user is ingored.
267 */
268 #undef __FUNCT__
269 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
270 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
271 {
272   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
273   MatScalar      value;
274   PetscTruth     roworiented = baij->roworiented;
275   PetscErrorCode ierr;
276   PetscInt       i,j,row,col;
277   PetscInt       rstart_orig=baij->rstart_bs;
278   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
279   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
280 
281   /* Some Variables required in the macro */
282   Mat            A = baij->A;
283   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
284   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
285   MatScalar      *aa=a->a;
286 
287   Mat            B = baij->B;
288   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
289   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
290   MatScalar     *ba=b->a;
291 
292   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
293   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
294   MatScalar     *ap,*bap;
295 
296   /* for stash */
297   PetscInt      n_loc, *in_loc=0;
298   MatScalar     *v_loc=0;
299 
300   PetscFunctionBegin;
301 
302   if(!baij->donotstash){
303     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
304     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
305   }
306 
307   for (i=0; i<m; i++) {
308     if (im[i] < 0) continue;
309 #if defined(PETSC_USE_DEBUG)
310     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
311 #endif
312     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
313       row = im[i] - rstart_orig;              /* local row index */
314       for (j=0; j<n; j++) {
315         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
316         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
317           col = in[j] - cstart_orig;          /* local col index */
318           brow = row/bs; bcol = col/bs;
319           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
320           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
321           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
322           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
323         } else if (in[j] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
326 #endif
327         else {  /* off-diag entry (B) */
328           if (mat->was_assembled) {
329             if (!baij->colmap) {
330               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
331             }
332 #if defined (PETSC_USE_CTABLE)
333             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
334             col  = col - 1;
335 #else
336             col = baij->colmap[in[j]/bs] - 1;
337 #endif
338             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
339               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
340               col =  in[j];
341               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
342               B = baij->B;
343               b = (Mat_SeqBAIJ*)(B)->data;
344               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
345               ba=b->a;
346             } else col += in[j]%bs;
347           } else col = in[j];
348           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
349           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
350           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
351         }
352       }
353     } else {  /* off processor entry */
354       if (!baij->donotstash) {
355         n_loc = 0;
356         for (j=0; j<n; j++){
357           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
358           in_loc[n_loc] = in[j];
359           if (roworiented) {
360             v_loc[n_loc] = v[i*n+j];
361           } else {
362             v_loc[n_loc] = v[j*m+i];
363           }
364           n_loc++;
365         }
366         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
367       }
368     }
369   }
370 
371   if(!baij->donotstash){
372     ierr = PetscFree(in_loc);CHKERRQ(ierr);
373     ierr = PetscFree(v_loc);CHKERRQ(ierr);
374   }
375   PetscFunctionReturn(0);
376 }
377 
378 #undef __FUNCT__
379 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
380 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
381 {
382   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
383   const MatScalar *value;
384   MatScalar       *barray=baij->barray;
385   PetscTruth      roworiented = baij->roworiented;
386   PetscErrorCode  ierr;
387   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
388   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
389   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
390 
391   PetscFunctionBegin;
392   if(!barray) {
393     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
394     baij->barray = barray;
395   }
396 
397   if (roworiented) {
398     stepval = (n-1)*bs;
399   } else {
400     stepval = (m-1)*bs;
401   }
402   for (i=0; i<m; i++) {
403     if (im[i] < 0) continue;
404 #if defined(PETSC_USE_DEBUG)
405     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
406 #endif
407     if (im[i] >= rstart && im[i] < rend) {
408       row = im[i] - rstart;
409       for (j=0; j<n; j++) {
410         /* If NumCol = 1 then a copy is not required */
411         if ((roworiented) && (n == 1)) {
412           barray = (MatScalar*) v + i*bs2;
413         } else if((!roworiented) && (m == 1)) {
414           barray = (MatScalar*) v + j*bs2;
415         } else { /* Here a copy is required */
416           if (roworiented) {
417             value = v + i*(stepval+bs)*bs + j*bs;
418           } else {
419             value = v + j*(stepval+bs)*bs + i*bs;
420           }
421           for (ii=0; ii<bs; ii++,value+=stepval) {
422             for (jj=0; jj<bs; jj++) {
423               *barray++  = *value++;
424             }
425           }
426           barray -=bs2;
427         }
428 
429         if (in[j] >= cstart && in[j] < cend){
430           col  = in[j] - cstart;
431           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
432         }
433         else if (in[j] < 0) continue;
434 #if defined(PETSC_USE_DEBUG)
435         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
436 #endif
437         else {
438           if (mat->was_assembled) {
439             if (!baij->colmap) {
440               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
441             }
442 
443 #if defined(PETSC_USE_DEBUG)
444 #if defined (PETSC_USE_CTABLE)
445             { PetscInt data;
446               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
447               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
448             }
449 #else
450             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
451 #endif
452 #endif
453 #if defined (PETSC_USE_CTABLE)
454 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
455             col  = (col - 1)/bs;
456 #else
457             col = (baij->colmap[in[j]] - 1)/bs;
458 #endif
459             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
460               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
461               col =  in[j];
462             }
463           }
464           else col = in[j];
465           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
466         }
467       }
468     } else {
469       if (!baij->donotstash) {
470         if (roworiented) {
471           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
472         } else {
473           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
474         }
475       }
476     }
477   }
478   PetscFunctionReturn(0);
479 }
480 
481 #undef __FUNCT__
482 #define __FUNCT__ "MatGetValues_MPISBAIJ"
483 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
484 {
485   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
486   PetscErrorCode ierr;
487   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
488   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
489 
490   PetscFunctionBegin;
491   for (i=0; i<m; i++) {
492     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
493     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
494     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
495       row = idxm[i] - bsrstart;
496       for (j=0; j<n; j++) {
497         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
498         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
499         if (idxn[j] >= bscstart && idxn[j] < bscend){
500           col = idxn[j] - bscstart;
501           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
502         } else {
503           if (!baij->colmap) {
504             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
505           }
506 #if defined (PETSC_USE_CTABLE)
507           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
508           data --;
509 #else
510           data = baij->colmap[idxn[j]/bs]-1;
511 #endif
512           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
513           else {
514             col  = data + idxn[j]%bs;
515             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
516           }
517         }
518       }
519     } else {
520       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
521     }
522   }
523  PetscFunctionReturn(0);
524 }
525 
526 #undef __FUNCT__
527 #define __FUNCT__ "MatNorm_MPISBAIJ"
528 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
529 {
530   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
531   PetscErrorCode ierr;
532   PetscReal      sum[2],*lnorm2;
533 
534   PetscFunctionBegin;
535   if (baij->size == 1) {
536     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
537   } else {
538     if (type == NORM_FROBENIUS) {
539       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
540       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
541       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
542       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
543       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
544       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
545       *norm = sqrt(sum[0] + 2*sum[1]);
546       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
547     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
548       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
549       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
550       PetscReal    *rsum,*rsum2,vabs;
551       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
552       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
553       MatScalar    *v;
554 
555       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
556       rsum2 = rsum + mat->N;
557       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
558       /* Amat */
559       v = amat->a; jj = amat->j;
560       for (brow=0; brow<mbs; brow++) {
561         grow = bs*(rstart + brow);
562         nz = amat->i[brow+1] - amat->i[brow];
563         for (bcol=0; bcol<nz; bcol++){
564           gcol = bs*(rstart + *jj); jj++;
565           for (col=0; col<bs; col++){
566             for (row=0; row<bs; row++){
567               vabs = PetscAbsScalar(*v); v++;
568               rsum[gcol+col] += vabs;
569               /* non-diagonal block */
570               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
571             }
572           }
573         }
574       }
575       /* Bmat */
576       v = bmat->a; jj = bmat->j;
577       for (brow=0; brow<mbs; brow++) {
578         grow = bs*(rstart + brow);
579         nz = bmat->i[brow+1] - bmat->i[brow];
580         for (bcol=0; bcol<nz; bcol++){
581           gcol = bs*garray[*jj]; jj++;
582           for (col=0; col<bs; col++){
583             for (row=0; row<bs; row++){
584               vabs = PetscAbsScalar(*v); v++;
585               rsum[gcol+col] += vabs;
586               rsum[grow+row] += vabs;
587             }
588           }
589         }
590       }
591       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
592       *norm = 0.0;
593       for (col=0; col<mat->N; col++) {
594         if (rsum2[col] > *norm) *norm = rsum2[col];
595       }
596       ierr = PetscFree(rsum);CHKERRQ(ierr);
597     } else {
598       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
599     }
600   }
601   PetscFunctionReturn(0);
602 }
603 
604 #undef __FUNCT__
605 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
606 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
607 {
608   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
609   PetscErrorCode ierr;
610   PetscInt       nstash,reallocs;
611   InsertMode     addv;
612 
613   PetscFunctionBegin;
614   if (baij->donotstash) {
615     PetscFunctionReturn(0);
616   }
617 
618   /* make sure all processors are either in INSERTMODE or ADDMODE */
619   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
620   if (addv == (ADD_VALUES|INSERT_VALUES)) {
621     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
622   }
623   mat->insertmode = addv; /* in case this processor had no cache */
624 
625   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
626   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
627   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
628   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
629   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
630   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
631   PetscFunctionReturn(0);
632 }
633 
634 #undef __FUNCT__
635 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
636 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
637 {
638   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
639   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
640   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
641   PetscErrorCode ierr;
642   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
643   PetscInt       *row,*col,other_disassembled;
644   PetscMPIInt    n;
645   PetscTruth     r1,r2,r3;
646   MatScalar      *val;
647   InsertMode     addv = mat->insertmode;
648 
649   PetscFunctionBegin;
650 
651   if (!baij->donotstash) {
652     while (1) {
653       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
654       if (!flg) break;
655 
656       for (i=0; i<n;) {
657         /* Now identify the consecutive vals belonging to the same row */
658         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
659         if (j < n) ncols = j-i;
660         else       ncols = n-i;
661         /* Now assemble all these values with a single function call */
662         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
663         i = j;
664       }
665     }
666     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
667     /* Now process the block-stash. Since the values are stashed column-oriented,
668        set the roworiented flag to column oriented, and after MatSetValues()
669        restore the original flags */
670     r1 = baij->roworiented;
671     r2 = a->roworiented;
672     r3 = b->roworiented;
673     baij->roworiented = PETSC_FALSE;
674     a->roworiented    = PETSC_FALSE;
675     b->roworiented    = PETSC_FALSE;
676     while (1) {
677       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
678       if (!flg) break;
679 
680       for (i=0; i<n;) {
681         /* Now identify the consecutive vals belonging to the same row */
682         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
683         if (j < n) ncols = j-i;
684         else       ncols = n-i;
685         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
686         i = j;
687       }
688     }
689     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
690     baij->roworiented = r1;
691     a->roworiented    = r2;
692     b->roworiented    = r3;
693   }
694 
695   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
696   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
697 
698   /* determine if any processor has disassembled, if so we must
699      also disassemble ourselfs, in order that we may reassemble. */
700   /*
701      if nonzero structure of submatrix B cannot change then we know that
702      no processor disassembled thus we can skip this stuff
703   */
704   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
705     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
706     if (mat->was_assembled && !other_disassembled) {
707       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
708     }
709   }
710 
711   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
712     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
713   }
714   b->compressedrow.use = PETSC_TRUE;
715   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
716   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
717 
718   if (baij->rowvalues) {
719     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
720     baij->rowvalues = 0;
721   }
722 
723   PetscFunctionReturn(0);
724 }
725 
726 #undef __FUNCT__
727 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
728 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
729 {
730   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
731   PetscErrorCode    ierr;
732   PetscInt          bs = mat->bs;
733   PetscMPIInt       size = baij->size,rank = baij->rank;
734   PetscTruth        iascii,isdraw;
735   PetscViewer       sviewer;
736   PetscViewerFormat format;
737 
738   PetscFunctionBegin;
739   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
740   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
741   if (iascii) {
742     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
743     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
744       MatInfo info;
745       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
746       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
747       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
748               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
749               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
750       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
751       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
752       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
753       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
754       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
755       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
756       PetscFunctionReturn(0);
757     } else if (format == PETSC_VIEWER_ASCII_INFO) {
758       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
759       PetscFunctionReturn(0);
760     }
761   }
762 
763   if (isdraw) {
764     PetscDraw       draw;
765     PetscTruth isnull;
766     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
767     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
768   }
769 
770   if (size == 1) {
771     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
772     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
773   } else {
774     /* assemble the entire matrix onto first processor. */
775     Mat         A;
776     Mat_SeqSBAIJ *Aloc;
777     Mat_SeqBAIJ *Bloc;
778     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
779     MatScalar   *a;
780 
781     /* Should this be the same type as mat? */
782     if (!rank) {
783       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
784     } else {
785       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
786     }
787     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
788     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
789     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
790 
791     /* copy over the A part */
792     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
793     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
794     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
795 
796     for (i=0; i<mbs; i++) {
797       rvals[0] = bs*(baij->rstart + i);
798       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
799       for (j=ai[i]; j<ai[i+1]; j++) {
800         col = (baij->cstart+aj[j])*bs;
801         for (k=0; k<bs; k++) {
802           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
803           col++; a += bs;
804         }
805       }
806     }
807     /* copy over the B part */
808     Bloc = (Mat_SeqBAIJ*)baij->B->data;
809     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
810     for (i=0; i<mbs; i++) {
811       rvals[0] = bs*(baij->rstart + i);
812       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
813       for (j=ai[i]; j<ai[i+1]; j++) {
814         col = baij->garray[aj[j]]*bs;
815         for (k=0; k<bs; k++) {
816           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
817           col++; a += bs;
818         }
819       }
820     }
821     ierr = PetscFree(rvals);CHKERRQ(ierr);
822     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
823     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
824     /*
825        Everyone has to call to draw the matrix since the graphics waits are
826        synchronized across all processors that share the PetscDraw object
827     */
828     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
829     if (!rank) {
830       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
831       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
832     }
833     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
834     ierr = MatDestroy(A);CHKERRQ(ierr);
835   }
836   PetscFunctionReturn(0);
837 }
838 
839 #undef __FUNCT__
840 #define __FUNCT__ "MatView_MPISBAIJ"
841 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
842 {
843   PetscErrorCode ierr;
844   PetscTruth     iascii,isdraw,issocket,isbinary;
845 
846   PetscFunctionBegin;
847   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
848   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
849   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
850   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
851   if (iascii || isdraw || issocket || isbinary) {
852     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
853   } else {
854     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
855   }
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatDestroy_MPISBAIJ"
861 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
862 {
863   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
864   PetscErrorCode ierr;
865 
866   PetscFunctionBegin;
867 #if defined(PETSC_USE_LOG)
868   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
869 #endif
870   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
871   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
872   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
873   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
874   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
875 #if defined (PETSC_USE_CTABLE)
876   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
877 #else
878   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
879 #endif
880   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
881   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
882   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
883   if (baij->slvec0) {
884     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
885     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
886   }
887   if (baij->slvec1) {
888     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
889     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
890     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
891   }
892   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
893   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
894   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
895   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
896 #if defined(PETSC_USE_MAT_SINGLE)
897   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
898 #endif
899   ierr = PetscFree(baij);CHKERRQ(ierr);
900 
901   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
902   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
903   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
904   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
905   PetscFunctionReturn(0);
906 }
907 
908 #undef __FUNCT__
909 #define __FUNCT__ "MatMult_MPISBAIJ"
910 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
911 {
912   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
913   PetscErrorCode ierr;
914   PetscInt       nt,mbs=a->mbs,bs=A->bs;
915   PetscScalar    *x,*from,zero=0.0;
916 
917   PetscFunctionBegin;
918   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
919   if (nt != A->n) {
920     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
921   }
922   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
923   if (nt != A->m) {
924     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
925   }
926 
927   /* diagonal part */
928   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
929   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
930 
931   /* subdiagonal part */
932   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
933 
934   /* copy x into the vec slvec0 */
935   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
936   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
937   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
938   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
939 
940   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
941   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
942   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
943 
944   /* supperdiagonal part */
945   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
946 
947   PetscFunctionReturn(0);
948 }
949 
950 #undef __FUNCT__
951 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
952 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
953 {
954   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
955   PetscErrorCode ierr;
956   PetscInt       nt;
957 
958   PetscFunctionBegin;
959   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
960   if (nt != A->n) {
961     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
962   }
963   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
964   if (nt != A->m) {
965     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
966   }
967 
968   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
969   /* do diagonal part */
970   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
971   /* do supperdiagonal part */
972   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
973   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
974   /* do subdiagonal part */
975   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
976   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
977   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
978 
979   PetscFunctionReturn(0);
980 }
981 
982 #undef __FUNCT__
983 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
984 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
985 {
986   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
987   PetscErrorCode ierr;
988   PetscInt       mbs=a->mbs,bs=A->bs;
989   PetscScalar    *x,*from,zero=0.0;
990 
991   PetscFunctionBegin;
992   /*
993   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
994   PetscSynchronizedFlush(A->comm);
995   */
996   /* diagonal part */
997   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
998   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
999 
1000   /* subdiagonal part */
1001   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1002 
1003   /* copy x into the vec slvec0 */
1004   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1005   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1006   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1007   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1008 
1009   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1010   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1011   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1012 
1013   /* supperdiagonal part */
1014   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1015 
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1021 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1022 {
1023   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1024   PetscErrorCode ierr;
1025 
1026   PetscFunctionBegin;
1027   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1028   /* do diagonal part */
1029   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1030   /* do supperdiagonal part */
1031   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1032   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1033 
1034   /* do subdiagonal part */
1035   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1036   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1037   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1038 
1039   PetscFunctionReturn(0);
1040 }
1041 
1042 #undef __FUNCT__
1043 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1044 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1045 {
1046   PetscErrorCode ierr;
1047 
1048   PetscFunctionBegin;
1049   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1055 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   PetscErrorCode ierr;
1058 
1059   PetscFunctionBegin;
1060   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 /*
1065   This only works correctly for square matrices where the subblock A->A is the
1066    diagonal block
1067 */
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1070 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1071 {
1072   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1073   PetscErrorCode ierr;
1074 
1075   PetscFunctionBegin;
1076   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1077   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatScale_MPISBAIJ"
1083 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1084 {
1085   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1086   PetscErrorCode ierr;
1087 
1088   PetscFunctionBegin;
1089   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1090   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1091   PetscFunctionReturn(0);
1092 }
1093 
1094 #undef __FUNCT__
1095 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1096 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1097 {
1098   PetscFunctionBegin;
1099   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1100   PetscFunctionReturn(0);
1101 }
1102 
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1105 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1106 {
1107   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1108 
1109   PetscFunctionBegin;
1110   if (!baij->getrowactive) {
1111     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1112   }
1113   baij->getrowactive = PETSC_FALSE;
1114   PetscFunctionReturn(0);
1115 }
1116 
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1119 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1120 {
1121   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1122   PetscErrorCode ierr;
1123 
1124   PetscFunctionBegin;
1125   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1126   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1132 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1133 {
1134   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1135   Mat            A = a->A,B = a->B;
1136   PetscErrorCode ierr;
1137   PetscReal      isend[5],irecv[5];
1138 
1139   PetscFunctionBegin;
1140   info->block_size     = (PetscReal)matin->bs;
1141   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1142   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1143   isend[3] = info->memory;  isend[4] = info->mallocs;
1144   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1145   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1146   isend[3] += info->memory;  isend[4] += info->mallocs;
1147   if (flag == MAT_LOCAL) {
1148     info->nz_used      = isend[0];
1149     info->nz_allocated = isend[1];
1150     info->nz_unneeded  = isend[2];
1151     info->memory       = isend[3];
1152     info->mallocs      = isend[4];
1153   } else if (flag == MAT_GLOBAL_MAX) {
1154     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1155     info->nz_used      = irecv[0];
1156     info->nz_allocated = irecv[1];
1157     info->nz_unneeded  = irecv[2];
1158     info->memory       = irecv[3];
1159     info->mallocs      = irecv[4];
1160   } else if (flag == MAT_GLOBAL_SUM) {
1161     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1162     info->nz_used      = irecv[0];
1163     info->nz_allocated = irecv[1];
1164     info->nz_unneeded  = irecv[2];
1165     info->memory       = irecv[3];
1166     info->mallocs      = irecv[4];
1167   } else {
1168     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1169   }
1170   info->rows_global       = (PetscReal)A->M;
1171   info->columns_global    = (PetscReal)A->N;
1172   info->rows_local        = (PetscReal)A->m;
1173   info->columns_local     = (PetscReal)A->N;
1174   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1175   info->fill_ratio_needed = 0;
1176   info->factor_mallocs    = 0;
1177   PetscFunctionReturn(0);
1178 }
1179 
1180 #undef __FUNCT__
1181 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1182 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1183 {
1184   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1185   PetscErrorCode ierr;
1186 
1187   PetscFunctionBegin;
1188   switch (op) {
1189   case MAT_NO_NEW_NONZERO_LOCATIONS:
1190   case MAT_YES_NEW_NONZERO_LOCATIONS:
1191   case MAT_COLUMNS_UNSORTED:
1192   case MAT_COLUMNS_SORTED:
1193   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1194   case MAT_KEEP_ZEROED_ROWS:
1195   case MAT_NEW_NONZERO_LOCATION_ERR:
1196     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1197     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1198     break;
1199   case MAT_ROW_ORIENTED:
1200     a->roworiented = PETSC_TRUE;
1201     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1202     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1203     break;
1204   case MAT_ROWS_SORTED:
1205   case MAT_ROWS_UNSORTED:
1206   case MAT_YES_NEW_DIAGONALS:
1207     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1208     break;
1209   case MAT_COLUMN_ORIENTED:
1210     a->roworiented = PETSC_FALSE;
1211     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1212     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1213     break;
1214   case MAT_IGNORE_OFF_PROC_ENTRIES:
1215     a->donotstash = PETSC_TRUE;
1216     break;
1217   case MAT_NO_NEW_DIAGONALS:
1218     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1219   case MAT_USE_HASH_TABLE:
1220     a->ht_flag = PETSC_TRUE;
1221     break;
1222   case MAT_NOT_SYMMETRIC:
1223   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1224   case MAT_HERMITIAN:
1225     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1226   case MAT_SYMMETRIC:
1227   case MAT_STRUCTURALLY_SYMMETRIC:
1228   case MAT_NOT_HERMITIAN:
1229   case MAT_SYMMETRY_ETERNAL:
1230   case MAT_NOT_SYMMETRY_ETERNAL:
1231     break;
1232   default:
1233     SETERRQ(PETSC_ERR_SUP,"unknown option");
1234   }
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1240 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1241 {
1242   PetscErrorCode ierr;
1243   PetscFunctionBegin;
1244   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1250 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1251 {
1252   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1253   Mat            a=baij->A, b=baij->B;
1254   PetscErrorCode ierr;
1255   PetscInt       nv,m,n;
1256   PetscTruth     flg;
1257 
1258   PetscFunctionBegin;
1259   if (ll != rr){
1260     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1261     if (!flg)
1262       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1263   }
1264   if (!ll) PetscFunctionReturn(0);
1265 
1266   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1267   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1268 
1269   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1270   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1271 
1272   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1273 
1274   /* left diagonalscale the off-diagonal part */
1275   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1276 
1277   /* scale the diagonal part */
1278   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1279 
1280   /* right diagonalscale the off-diagonal part */
1281   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1282   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1288 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1289 {
1290   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1291   MPI_Comm          comm = A->comm;
1292   static PetscTruth called = PETSC_FALSE;
1293   PetscErrorCode    ierr;
1294 
1295   PetscFunctionBegin;
1296   if (!a->rank) {
1297     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1298   }
1299   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1300   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1301   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1307 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1308 {
1309   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1310   PetscErrorCode ierr;
1311 
1312   PetscFunctionBegin;
1313   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1318 
1319 #undef __FUNCT__
1320 #define __FUNCT__ "MatEqual_MPISBAIJ"
1321 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1322 {
1323   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1324   Mat            a,b,c,d;
1325   PetscTruth     flg;
1326   PetscErrorCode ierr;
1327 
1328   PetscFunctionBegin;
1329   a = matA->A; b = matA->B;
1330   c = matB->A; d = matB->B;
1331 
1332   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1333   if (flg) {
1334     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1335   }
1336   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1342 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1343 {
1344   PetscErrorCode ierr;
1345 
1346   PetscFunctionBegin;
1347   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1348   PetscFunctionReturn(0);
1349 }
1350 
1351 #undef __FUNCT__
1352 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1353 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1354 {
1355   PetscErrorCode ierr;
1356   PetscInt       i;
1357   PetscTruth     flg;
1358 
1359   PetscFunctionBegin;
1360   for (i=0; i<n; i++) {
1361     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1362     if (!flg) {
1363       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1364     }
1365   }
1366   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 
1371 /* -------------------------------------------------------------------*/
1372 static struct _MatOps MatOps_Values = {
1373        MatSetValues_MPISBAIJ,
1374        MatGetRow_MPISBAIJ,
1375        MatRestoreRow_MPISBAIJ,
1376        MatMult_MPISBAIJ,
1377 /* 4*/ MatMultAdd_MPISBAIJ,
1378        MatMultTranspose_MPISBAIJ,
1379        MatMultTransposeAdd_MPISBAIJ,
1380        0,
1381        0,
1382        0,
1383 /*10*/ 0,
1384        0,
1385        0,
1386        MatRelax_MPISBAIJ,
1387        MatTranspose_MPISBAIJ,
1388 /*15*/ MatGetInfo_MPISBAIJ,
1389        MatEqual_MPISBAIJ,
1390        MatGetDiagonal_MPISBAIJ,
1391        MatDiagonalScale_MPISBAIJ,
1392        MatNorm_MPISBAIJ,
1393 /*20*/ MatAssemblyBegin_MPISBAIJ,
1394        MatAssemblyEnd_MPISBAIJ,
1395        0,
1396        MatSetOption_MPISBAIJ,
1397        MatZeroEntries_MPISBAIJ,
1398 /*25*/ 0,
1399        0,
1400        0,
1401        0,
1402        0,
1403 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1404        0,
1405        0,
1406        0,
1407        0,
1408 /*35*/ MatDuplicate_MPISBAIJ,
1409        0,
1410        0,
1411        0,
1412        0,
1413 /*40*/ 0,
1414        MatGetSubMatrices_MPISBAIJ,
1415        MatIncreaseOverlap_MPISBAIJ,
1416        MatGetValues_MPISBAIJ,
1417        0,
1418 /*45*/ MatPrintHelp_MPISBAIJ,
1419        MatScale_MPISBAIJ,
1420        0,
1421        0,
1422        0,
1423 /*50*/ 0,
1424        0,
1425        0,
1426        0,
1427        0,
1428 /*55*/ 0,
1429        0,
1430        MatSetUnfactored_MPISBAIJ,
1431        0,
1432        MatSetValuesBlocked_MPISBAIJ,
1433 /*60*/ 0,
1434        0,
1435        0,
1436        MatGetPetscMaps_Petsc,
1437        0,
1438 /*65*/ 0,
1439        0,
1440        0,
1441        0,
1442        0,
1443 /*70*/ MatGetRowMax_MPISBAIJ,
1444        0,
1445        0,
1446        0,
1447        0,
1448 /*75*/ 0,
1449        0,
1450        0,
1451        0,
1452        0,
1453 /*80*/ 0,
1454        0,
1455        0,
1456        0,
1457        MatLoad_MPISBAIJ,
1458 /*85*/ 0,
1459        0,
1460        0,
1461        0,
1462        0,
1463 /*90*/ 0,
1464        0,
1465        0,
1466        0,
1467        0,
1468 /*95*/ 0,
1469        0,
1470        0,
1471        0};
1472 
1473 
1474 EXTERN_C_BEGIN
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1477 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1478 {
1479   PetscFunctionBegin;
1480   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1481   *iscopy = PETSC_FALSE;
1482   PetscFunctionReturn(0);
1483 }
1484 EXTERN_C_END
1485 
1486 EXTERN_C_BEGIN
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1489 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1490 {
1491   Mat_MPISBAIJ   *b;
1492   PetscErrorCode ierr;
1493   PetscInt       i,mbs,Mbs;
1494 
1495   PetscFunctionBegin;
1496   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1497 
1498   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1499   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1500   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1501   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1502   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1503   if (d_nnz) {
1504     for (i=0; i<B->m/bs; i++) {
1505       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1506     }
1507   }
1508   if (o_nnz) {
1509     for (i=0; i<B->m/bs; i++) {
1510       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1511     }
1512   }
1513   B->preallocated = PETSC_TRUE;
1514   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1515   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1516   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1517   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1518 
1519   b   = (Mat_MPISBAIJ*)B->data;
1520   mbs = B->m/bs;
1521   Mbs = B->M/bs;
1522   if (mbs*bs != B->m) {
1523     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1524   }
1525 
1526   B->bs  = bs;
1527   b->bs2 = bs*bs;
1528   b->mbs = mbs;
1529   b->nbs = mbs;
1530   b->Mbs = Mbs;
1531   b->Nbs = Mbs;
1532 
1533   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1534   b->rowners[0]    = 0;
1535   for (i=2; i<=b->size; i++) {
1536     b->rowners[i] += b->rowners[i-1];
1537   }
1538   b->rstart    = b->rowners[b->rank];
1539   b->rend      = b->rowners[b->rank+1];
1540   b->cstart    = b->rstart;
1541   b->cend      = b->rend;
1542   for (i=0; i<=b->size; i++) {
1543     b->rowners_bs[i] = b->rowners[i]*bs;
1544   }
1545   b->rstart_bs = b-> rstart*bs;
1546   b->rend_bs   = b->rend*bs;
1547 
1548   b->cstart_bs = b->cstart*bs;
1549   b->cend_bs   = b->cend*bs;
1550 
1551   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1552   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1553   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1554   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1555 
1556   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1557   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1558   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1559   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1560 
1561   /* build cache for off array entries formed */
1562   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1563 
1564   PetscFunctionReturn(0);
1565 }
1566 EXTERN_C_END
1567 
1568 /*MC
1569    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1570    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1571 
1572    Options Database Keys:
1573 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1574 
1575   Level: beginner
1576 
1577 .seealso: MatCreateMPISBAIJ
1578 M*/
1579 
1580 EXTERN_C_BEGIN
1581 #undef __FUNCT__
1582 #define __FUNCT__ "MatCreate_MPISBAIJ"
1583 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1584 {
1585   Mat_MPISBAIJ   *b;
1586   PetscErrorCode ierr;
1587   PetscTruth     flg;
1588 
1589   PetscFunctionBegin;
1590 
1591   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1592   B->data = (void*)b;
1593   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1594 
1595   B->ops->destroy    = MatDestroy_MPISBAIJ;
1596   B->ops->view       = MatView_MPISBAIJ;
1597   B->mapping    = 0;
1598   B->factor     = 0;
1599   B->assembled  = PETSC_FALSE;
1600 
1601   B->insertmode = NOT_SET_VALUES;
1602   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1603   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1604 
1605   /* build local table of row and column ownerships */
1606   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1607   b->cowners    = b->rowners + b->size + 2;
1608   b->rowners_bs = b->cowners + b->size + 2;
1609   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1610 
1611   /* build cache for off array entries formed */
1612   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1613   b->donotstash  = PETSC_FALSE;
1614   b->colmap      = PETSC_NULL;
1615   b->garray      = PETSC_NULL;
1616   b->roworiented = PETSC_TRUE;
1617 
1618 #if defined(PETSC_USE_MAT_SINGLE)
1619   /* stuff for MatSetValues_XXX in single precision */
1620   b->setvalueslen     = 0;
1621   b->setvaluescopy    = PETSC_NULL;
1622 #endif
1623 
1624   /* stuff used in block assembly */
1625   b->barray       = 0;
1626 
1627   /* stuff used for matrix vector multiply */
1628   b->lvec         = 0;
1629   b->Mvctx        = 0;
1630   b->slvec0       = 0;
1631   b->slvec0b      = 0;
1632   b->slvec1       = 0;
1633   b->slvec1a      = 0;
1634   b->slvec1b      = 0;
1635   b->sMvctx       = 0;
1636 
1637   /* stuff for MatGetRow() */
1638   b->rowindices   = 0;
1639   b->rowvalues    = 0;
1640   b->getrowactive = PETSC_FALSE;
1641 
1642   /* hash table stuff */
1643   b->ht           = 0;
1644   b->hd           = 0;
1645   b->ht_size      = 0;
1646   b->ht_flag      = PETSC_FALSE;
1647   b->ht_fact      = 0;
1648   b->ht_total_ct  = 0;
1649   b->ht_insert_ct = 0;
1650 
1651   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1652   if (flg) {
1653     PetscReal fact = 1.39;
1654     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1655     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1656     if (fact <= 1.0) fact = 1.39;
1657     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1658     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1659   }
1660   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1661                                      "MatStoreValues_MPISBAIJ",
1662                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1663   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1664                                      "MatRetrieveValues_MPISBAIJ",
1665                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1666   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1667                                      "MatGetDiagonalBlock_MPISBAIJ",
1668                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1669   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1670                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1671                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1672   B->symmetric                  = PETSC_TRUE;
1673   B->structurally_symmetric     = PETSC_TRUE;
1674   B->symmetric_set              = PETSC_TRUE;
1675   B->structurally_symmetric_set = PETSC_TRUE;
1676   PetscFunctionReturn(0);
1677 }
1678 EXTERN_C_END
1679 
1680 /*MC
1681    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1682 
1683    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1684    and MATMPISBAIJ otherwise.
1685 
1686    Options Database Keys:
1687 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1688 
1689   Level: beginner
1690 
1691 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1692 M*/
1693 
1694 EXTERN_C_BEGIN
1695 #undef __FUNCT__
1696 #define __FUNCT__ "MatCreate_SBAIJ"
1697 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1698 {
1699   PetscErrorCode ierr;
1700   PetscMPIInt    size;
1701 
1702   PetscFunctionBegin;
1703   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1704   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1705   if (size == 1) {
1706     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1707   } else {
1708     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1709   }
1710   PetscFunctionReturn(0);
1711 }
1712 EXTERN_C_END
1713 
1714 #undef __FUNCT__
1715 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1716 /*@C
1717    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1718    the user should preallocate the matrix storage by setting the parameters
1719    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1720    performance can be increased by more than a factor of 50.
1721 
1722    Collective on Mat
1723 
1724    Input Parameters:
1725 +  A - the matrix
1726 .  bs   - size of blockk
1727 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1728            submatrix  (same for all local rows)
1729 .  d_nnz - array containing the number of block nonzeros in the various block rows
1730            in the upper triangular and diagonal part of the in diagonal portion of the local
1731            (possibly different for each block row) or PETSC_NULL.  You must leave room
1732            for the diagonal entry even if it is zero.
1733 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1734            submatrix (same for all local rows).
1735 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1736            off-diagonal portion of the local submatrix (possibly different for
1737            each block row) or PETSC_NULL.
1738 
1739 
1740    Options Database Keys:
1741 .   -mat_no_unroll - uses code that does not unroll the loops in the
1742                      block calculations (much slower)
1743 .   -mat_block_size - size of the blocks to use
1744 
1745    Notes:
1746 
1747    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1748    than it must be used on all processors that share the object for that argument.
1749 
1750    If the *_nnz parameter is given then the *_nz parameter is ignored
1751 
1752    Storage Information:
1753    For a square global matrix we define each processor's diagonal portion
1754    to be its local rows and the corresponding columns (a square submatrix);
1755    each processor's off-diagonal portion encompasses the remainder of the
1756    local matrix (a rectangular submatrix).
1757 
1758    The user can specify preallocated storage for the diagonal part of
1759    the local submatrix with either d_nz or d_nnz (not both).  Set
1760    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1761    memory allocation.  Likewise, specify preallocated storage for the
1762    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1763 
1764    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1765    the figure below we depict these three local rows and all columns (0-11).
1766 
1767 .vb
1768            0 1 2 3 4 5 6 7 8 9 10 11
1769           -------------------
1770    row 3  |  o o o d d d o o o o o o
1771    row 4  |  o o o d d d o o o o o o
1772    row 5  |  o o o d d d o o o o o o
1773           -------------------
1774 .ve
1775 
1776    Thus, any entries in the d locations are stored in the d (diagonal)
1777    submatrix, and any entries in the o locations are stored in the
1778    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1779    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1780 
1781    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1782    plus the diagonal part of the d matrix,
1783    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1784    In general, for PDE problems in which most nonzeros are near the diagonal,
1785    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1786    or you will get TERRIBLE performance; see the users' manual chapter on
1787    matrices.
1788 
1789    Level: intermediate
1790 
1791 .keywords: matrix, block, aij, compressed row, sparse, parallel
1792 
1793 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1794 @*/
1795 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1796 {
1797   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1798 
1799   PetscFunctionBegin;
1800   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1801   if (f) {
1802     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1803   }
1804   PetscFunctionReturn(0);
1805 }
1806 
1807 #undef __FUNCT__
1808 #define __FUNCT__ "MatCreateMPISBAIJ"
1809 /*@C
1810    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1811    (block compressed row).  For good matrix assembly performance
1812    the user should preallocate the matrix storage by setting the parameters
1813    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1814    performance can be increased by more than a factor of 50.
1815 
1816    Collective on MPI_Comm
1817 
1818    Input Parameters:
1819 +  comm - MPI communicator
1820 .  bs   - size of blockk
1821 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1822            This value should be the same as the local size used in creating the
1823            y vector for the matrix-vector product y = Ax.
1824 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1825            This value should be the same as the local size used in creating the
1826            x vector for the matrix-vector product y = Ax.
1827 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1828 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1829 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1830            submatrix  (same for all local rows)
1831 .  d_nnz - array containing the number of block nonzeros in the various block rows
1832            in the upper triangular portion of the in diagonal portion of the local
1833            (possibly different for each block block row) or PETSC_NULL.
1834            You must leave room for the diagonal entry even if it is zero.
1835 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1836            submatrix (same for all local rows).
1837 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1838            off-diagonal portion of the local submatrix (possibly different for
1839            each block row) or PETSC_NULL.
1840 
1841    Output Parameter:
1842 .  A - the matrix
1843 
1844    Options Database Keys:
1845 .   -mat_no_unroll - uses code that does not unroll the loops in the
1846                      block calculations (much slower)
1847 .   -mat_block_size - size of the blocks to use
1848 .   -mat_mpi - use the parallel matrix data structures even on one processor
1849                (defaults to using SeqBAIJ format on one processor)
1850 
1851    Notes:
1852    The number of rows and columns must be divisible by blocksize.
1853 
1854    The user MUST specify either the local or global matrix dimensions
1855    (possibly both).
1856 
1857    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1858    than it must be used on all processors that share the object for that argument.
1859 
1860    If the *_nnz parameter is given then the *_nz parameter is ignored
1861 
1862    Storage Information:
1863    For a square global matrix we define each processor's diagonal portion
1864    to be its local rows and the corresponding columns (a square submatrix);
1865    each processor's off-diagonal portion encompasses the remainder of the
1866    local matrix (a rectangular submatrix).
1867 
1868    The user can specify preallocated storage for the diagonal part of
1869    the local submatrix with either d_nz or d_nnz (not both).  Set
1870    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1871    memory allocation.  Likewise, specify preallocated storage for the
1872    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1873 
1874    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1875    the figure below we depict these three local rows and all columns (0-11).
1876 
1877 .vb
1878            0 1 2 3 4 5 6 7 8 9 10 11
1879           -------------------
1880    row 3  |  o o o d d d o o o o o o
1881    row 4  |  o o o d d d o o o o o o
1882    row 5  |  o o o d d d o o o o o o
1883           -------------------
1884 .ve
1885 
1886    Thus, any entries in the d locations are stored in the d (diagonal)
1887    submatrix, and any entries in the o locations are stored in the
1888    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1889    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1890 
1891    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1892    plus the diagonal part of the d matrix,
1893    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1894    In general, for PDE problems in which most nonzeros are near the diagonal,
1895    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1896    or you will get TERRIBLE performance; see the users' manual chapter on
1897    matrices.
1898 
1899    Level: intermediate
1900 
1901 .keywords: matrix, block, aij, compressed row, sparse, parallel
1902 
1903 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1904 @*/
1905 
1906 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1907 {
1908   PetscErrorCode ierr;
1909   PetscMPIInt    size;
1910 
1911   PetscFunctionBegin;
1912   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1913   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1914   if (size > 1) {
1915     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1916     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1917   } else {
1918     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1919     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1920   }
1921   PetscFunctionReturn(0);
1922 }
1923 
1924 
1925 #undef __FUNCT__
1926 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1927 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1928 {
1929   Mat            mat;
1930   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1931   PetscErrorCode ierr;
1932   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1933   PetscScalar    *array;
1934 
1935   PetscFunctionBegin;
1936   *newmat       = 0;
1937   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1938   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1939   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1940 
1941   mat->factor       = matin->factor;
1942   mat->preallocated = PETSC_TRUE;
1943   mat->assembled    = PETSC_TRUE;
1944   mat->insertmode   = NOT_SET_VALUES;
1945 
1946   a = (Mat_MPISBAIJ*)mat->data;
1947   mat->bs  = matin->bs;
1948   a->bs2   = oldmat->bs2;
1949   a->mbs   = oldmat->mbs;
1950   a->nbs   = oldmat->nbs;
1951   a->Mbs   = oldmat->Mbs;
1952   a->Nbs   = oldmat->Nbs;
1953 
1954   a->rstart       = oldmat->rstart;
1955   a->rend         = oldmat->rend;
1956   a->cstart       = oldmat->cstart;
1957   a->cend         = oldmat->cend;
1958   a->size         = oldmat->size;
1959   a->rank         = oldmat->rank;
1960   a->donotstash   = oldmat->donotstash;
1961   a->roworiented  = oldmat->roworiented;
1962   a->rowindices   = 0;
1963   a->rowvalues    = 0;
1964   a->getrowactive = PETSC_FALSE;
1965   a->barray       = 0;
1966   a->rstart_bs    = oldmat->rstart_bs;
1967   a->rend_bs      = oldmat->rend_bs;
1968   a->cstart_bs    = oldmat->cstart_bs;
1969   a->cend_bs      = oldmat->cend_bs;
1970 
1971   /* hash table stuff */
1972   a->ht           = 0;
1973   a->hd           = 0;
1974   a->ht_size      = 0;
1975   a->ht_flag      = oldmat->ht_flag;
1976   a->ht_fact      = oldmat->ht_fact;
1977   a->ht_total_ct  = 0;
1978   a->ht_insert_ct = 0;
1979 
1980   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1981   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1982   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1983   if (oldmat->colmap) {
1984 #if defined (PETSC_USE_CTABLE)
1985     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1986 #else
1987     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1988     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1989     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1990 #endif
1991   } else a->colmap = 0;
1992 
1993   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1994     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1995     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1996     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1997   } else a->garray = 0;
1998 
1999   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2000   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2001   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2002   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2003 
2004   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2005   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2006   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2007   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2008 
2009   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2010   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2011   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2012   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2013   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2014   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2015   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2016   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2017   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2018   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2019   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2020   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2021   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2022 
2023   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2024   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2025   a->sMvctx = oldmat->sMvctx;
2026   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2027 
2028   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2029   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2030   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2031   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2032   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2033   *newmat = mat;
2034   PetscFunctionReturn(0);
2035 }
2036 
2037 #include "petscsys.h"
2038 
2039 #undef __FUNCT__
2040 #define __FUNCT__ "MatLoad_MPISBAIJ"
2041 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2042 {
2043   Mat            A;
2044   PetscErrorCode ierr;
2045   PetscInt       i,nz,j,rstart,rend;
2046   PetscScalar    *vals,*buf;
2047   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2048   MPI_Status     status;
2049   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2050   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2051   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2052   PetscInt       bs=1,Mbs,mbs,extra_rows;
2053   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2054   PetscInt       dcount,kmax,k,nzcount,tmp;
2055   int            fd;
2056 
2057   PetscFunctionBegin;
2058   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2059 
2060   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2061   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2062   if (!rank) {
2063     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2064     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2065     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2066     if (header[3] < 0) {
2067       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2068     }
2069   }
2070 
2071   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2072   M = header[1]; N = header[2];
2073 
2074   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2075 
2076   /*
2077      This code adds extra rows to make sure the number of rows is
2078      divisible by the blocksize
2079   */
2080   Mbs        = M/bs;
2081   extra_rows = bs - M + bs*(Mbs);
2082   if (extra_rows == bs) extra_rows = 0;
2083   else                  Mbs++;
2084   if (extra_rows &&!rank) {
2085     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2086   }
2087 
2088   /* determine ownership of all rows */
2089   mbs        = Mbs/size + ((Mbs % size) > rank);
2090   m          = mbs*bs;
2091   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2092   browners   = rowners + size + 1;
2093   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2094   rowners[0] = 0;
2095   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2096   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2097   rstart = rowners[rank];
2098   rend   = rowners[rank+1];
2099 
2100   /* distribute row lengths to all processors */
2101   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2102   if (!rank) {
2103     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2104     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2105     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2106     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2107     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2108     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2109     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2110   } else {
2111     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2112   }
2113 
2114   if (!rank) {   /* procs[0] */
2115     /* calculate the number of nonzeros on each processor */
2116     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2117     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2118     for (i=0; i<size; i++) {
2119       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2120         procsnz[i] += rowlengths[j];
2121       }
2122     }
2123     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2124 
2125     /* determine max buffer needed and allocate it */
2126     maxnz = 0;
2127     for (i=0; i<size; i++) {
2128       maxnz = PetscMax(maxnz,procsnz[i]);
2129     }
2130     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2131 
2132     /* read in my part of the matrix column indices  */
2133     nz     = procsnz[0];
2134     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2135     mycols = ibuf;
2136     if (size == 1)  nz -= extra_rows;
2137     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2138     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2139 
2140     /* read in every ones (except the last) and ship off */
2141     for (i=1; i<size-1; i++) {
2142       nz   = procsnz[i];
2143       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2144       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2145     }
2146     /* read in the stuff for the last proc */
2147     if (size != 1) {
2148       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2149       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2150       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2151       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2152     }
2153     ierr = PetscFree(cols);CHKERRQ(ierr);
2154   } else {  /* procs[i], i>0 */
2155     /* determine buffer space needed for message */
2156     nz = 0;
2157     for (i=0; i<m; i++) {
2158       nz += locrowlens[i];
2159     }
2160     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2161     mycols = ibuf;
2162     /* receive message of column indices*/
2163     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2164     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2165     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2166   }
2167 
2168   /* loop over local rows, determining number of off diagonal entries */
2169   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2170   odlens   = dlens + (rend-rstart);
2171   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2172   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2173   masked1  = mask    + Mbs;
2174   masked2  = masked1 + Mbs;
2175   rowcount = 0; nzcount = 0;
2176   for (i=0; i<mbs; i++) {
2177     dcount  = 0;
2178     odcount = 0;
2179     for (j=0; j<bs; j++) {
2180       kmax = locrowlens[rowcount];
2181       for (k=0; k<kmax; k++) {
2182         tmp = mycols[nzcount++]/bs; /* block col. index */
2183         if (!mask[tmp]) {
2184           mask[tmp] = 1;
2185           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2186           else masked1[dcount++] = tmp; /* entry in diag portion */
2187         }
2188       }
2189       rowcount++;
2190     }
2191 
2192     dlens[i]  = dcount;  /* d_nzz[i] */
2193     odlens[i] = odcount; /* o_nzz[i] */
2194 
2195     /* zero out the mask elements we set */
2196     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2197     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2198   }
2199 
2200   /* create our matrix */
2201   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2202   ierr = MatSetType(A,type);CHKERRQ(ierr);
2203   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2204   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2205 
2206   if (!rank) {
2207     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2208     /* read in my part of the matrix numerical values  */
2209     nz = procsnz[0];
2210     vals = buf;
2211     mycols = ibuf;
2212     if (size == 1)  nz -= extra_rows;
2213     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2214     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2215 
2216     /* insert into matrix */
2217     jj      = rstart*bs;
2218     for (i=0; i<m; i++) {
2219       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2220       mycols += locrowlens[i];
2221       vals   += locrowlens[i];
2222       jj++;
2223     }
2224 
2225     /* read in other processors (except the last one) and ship out */
2226     for (i=1; i<size-1; i++) {
2227       nz   = procsnz[i];
2228       vals = buf;
2229       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2230       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2231     }
2232     /* the last proc */
2233     if (size != 1){
2234       nz   = procsnz[i] - extra_rows;
2235       vals = buf;
2236       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2237       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2238       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2239     }
2240     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2241 
2242   } else {
2243     /* receive numeric values */
2244     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2245 
2246     /* receive message of values*/
2247     vals   = buf;
2248     mycols = ibuf;
2249     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2250     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2251     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2252 
2253     /* insert into matrix */
2254     jj      = rstart*bs;
2255     for (i=0; i<m; i++) {
2256       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2257       mycols += locrowlens[i];
2258       vals   += locrowlens[i];
2259       jj++;
2260     }
2261   }
2262 
2263   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2264   ierr = PetscFree(buf);CHKERRQ(ierr);
2265   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2266   ierr = PetscFree(rowners);CHKERRQ(ierr);
2267   ierr = PetscFree(dlens);CHKERRQ(ierr);
2268   ierr = PetscFree(mask);CHKERRQ(ierr);
2269   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2270   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2271   *newmat = A;
2272   PetscFunctionReturn(0);
2273 }
2274 
2275 #undef __FUNCT__
2276 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2277 /*XXXXX@
2278    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2279 
2280    Input Parameters:
2281 .  mat  - the matrix
2282 .  fact - factor
2283 
2284    Collective on Mat
2285 
2286    Level: advanced
2287 
2288   Notes:
2289    This can also be set by the command line option: -mat_use_hash_table fact
2290 
2291 .keywords: matrix, hashtable, factor, HT
2292 
2293 .seealso: MatSetOption()
2294 @XXXXX*/
2295 
2296 
2297 #undef __FUNCT__
2298 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2299 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2300 {
2301   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2302   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2303   PetscReal      atmp;
2304   PetscReal      *work,*svalues,*rvalues;
2305   PetscErrorCode ierr;
2306   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2307   PetscMPIInt    rank,size;
2308   PetscInt       *rowners_bs,dest,count,source;
2309   PetscScalar    *va;
2310   MatScalar      *ba;
2311   MPI_Status     stat;
2312 
2313   PetscFunctionBegin;
2314   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2315   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2316 
2317   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2318   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2319 
2320   bs   = A->bs;
2321   mbs  = a->mbs;
2322   Mbs  = a->Mbs;
2323   ba   = b->a;
2324   bi   = b->i;
2325   bj   = b->j;
2326 
2327   /* find ownerships */
2328   rowners_bs = a->rowners_bs;
2329 
2330   /* each proc creates an array to be distributed */
2331   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2332   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2333 
2334   /* row_max for B */
2335   if (rank != size-1){
2336     for (i=0; i<mbs; i++) {
2337       ncols = bi[1] - bi[0]; bi++;
2338       brow  = bs*i;
2339       for (j=0; j<ncols; j++){
2340         bcol = bs*(*bj);
2341         for (kcol=0; kcol<bs; kcol++){
2342           col = bcol + kcol;                 /* local col index */
2343           col += rowners_bs[rank+1];      /* global col index */
2344           for (krow=0; krow<bs; krow++){
2345             atmp = PetscAbsScalar(*ba); ba++;
2346             row = brow + krow;    /* local row index */
2347             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2348             if (work[col] < atmp) work[col] = atmp;
2349           }
2350         }
2351         bj++;
2352       }
2353     }
2354 
2355     /* send values to its owners */
2356     for (dest=rank+1; dest<size; dest++){
2357       svalues = work + rowners_bs[dest];
2358       count   = rowners_bs[dest+1]-rowners_bs[dest];
2359       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2360     }
2361   }
2362 
2363   /* receive values */
2364   if (rank){
2365     rvalues = work;
2366     count   = rowners_bs[rank+1]-rowners_bs[rank];
2367     for (source=0; source<rank; source++){
2368       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2369       /* process values */
2370       for (i=0; i<count; i++){
2371         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2372       }
2373     }
2374   }
2375 
2376   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2377   ierr = PetscFree(work);CHKERRQ(ierr);
2378   PetscFunctionReturn(0);
2379 }
2380 
2381 #undef __FUNCT__
2382 #define __FUNCT__ "MatRelax_MPISBAIJ"
2383 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2384 {
2385   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2386   PetscErrorCode ierr;
2387   PetscInt       mbs=mat->mbs,bs=matin->bs;
2388   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2389   Vec            bb1;
2390 
2391   PetscFunctionBegin;
2392   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2393   if (bs > 1)
2394     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2395 
2396   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2397     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2398       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2399       its--;
2400     }
2401 
2402     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2403     while (its--){
2404 
2405       /* lower triangular part: slvec0b = - B^T*xx */
2406       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2407 
2408       /* copy xx into slvec0a */
2409       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2410       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2411       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2412       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2413 
2414       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2415 
2416       /* copy bb into slvec1a */
2417       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2418       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2419       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2420       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2421 
2422       /* set slvec1b = 0 */
2423       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2424 
2425       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2426       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2427       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2428       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2429 
2430       /* upper triangular part: bb1 = bb1 - B*x */
2431       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2432 
2433       /* local diagonal sweep */
2434       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2435     }
2436     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2437   } else {
2438     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2439   }
2440   PetscFunctionReturn(0);
2441 }
2442 
2443 #undef __FUNCT__
2444 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2445 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2446 {
2447   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2448   PetscErrorCode ierr;
2449   PetscScalar    mone=-1.0;
2450   Vec            lvec1,bb1;
2451 
2452   PetscFunctionBegin;
2453   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2454   if (matin->bs > 1)
2455     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2456 
2457   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2458     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2459       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2460       its--;
2461     }
2462 
2463     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2464     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2465     while (its--){
2466       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2467 
2468       /* lower diagonal part: bb1 = bb - B^T*xx */
2469       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2470       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2471 
2472       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2473       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2474       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2475 
2476       /* upper diagonal part: bb1 = bb1 - B*x */
2477       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2478       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2479 
2480       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2481 
2482       /* diagonal sweep */
2483       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2484     }
2485     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2486     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2487   } else {
2488     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2489   }
2490   PetscFunctionReturn(0);
2491 }
2492 
2493