xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2b5b5c927f29f33489d0e993870bac2f9a4a8f8c)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else {
563       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
564     }
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
571 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
572 {
573   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
574   PetscErrorCode ierr;
575   PetscInt       nstash,reallocs;
576   InsertMode     addv;
577 
578   PetscFunctionBegin;
579   if (baij->donotstash) {
580     PetscFunctionReturn(0);
581   }
582 
583   /* make sure all processors are either in INSERTMODE or ADDMODE */
584   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
585   if (addv == (ADD_VALUES|INSERT_VALUES)) {
586     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
587   }
588   mat->insertmode = addv; /* in case this processor had no cache */
589 
590   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
591   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
592   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
593   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
594   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
595   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
601 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
602 {
603   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
604   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
605   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
606   PetscErrorCode ierr;
607   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
608   PetscInt       *row,*col,other_disassembled;
609   PetscMPIInt    n;
610   PetscTruth     r1,r2,r3;
611   MatScalar      *val;
612   InsertMode     addv = mat->insertmode;
613 
614   PetscFunctionBegin;
615 
616   if (!baij->donotstash) {
617     while (1) {
618       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
619       if (!flg) break;
620 
621       for (i=0; i<n;) {
622         /* Now identify the consecutive vals belonging to the same row */
623         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
624         if (j < n) ncols = j-i;
625         else       ncols = n-i;
626         /* Now assemble all these values with a single function call */
627         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
628         i = j;
629       }
630     }
631     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
632     /* Now process the block-stash. Since the values are stashed column-oriented,
633        set the roworiented flag to column oriented, and after MatSetValues()
634        restore the original flags */
635     r1 = baij->roworiented;
636     r2 = a->roworiented;
637     r3 = b->roworiented;
638     baij->roworiented = PETSC_FALSE;
639     a->roworiented    = PETSC_FALSE;
640     b->roworiented    = PETSC_FALSE;
641     while (1) {
642       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
643       if (!flg) break;
644 
645       for (i=0; i<n;) {
646         /* Now identify the consecutive vals belonging to the same row */
647         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
648         if (j < n) ncols = j-i;
649         else       ncols = n-i;
650         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
651         i = j;
652       }
653     }
654     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
655     baij->roworiented = r1;
656     a->roworiented    = r2;
657     b->roworiented    = r3;
658   }
659 
660   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675 
676   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
677     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
678   }
679   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
681 
682   if (baij->rowvalues) {
683     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
684     baij->rowvalues = 0;
685   }
686 
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
692 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
693 {
694   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
695   PetscErrorCode    ierr;
696   PetscInt          bs = mat->bs;
697   PetscMPIInt       size = baij->size,rank = baij->rank;
698   PetscTruth        iascii,isdraw;
699   PetscViewer       sviewer;
700   PetscViewerFormat format;
701 
702   PetscFunctionBegin;
703   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
704   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
705   if (iascii) {
706     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
707     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
708       MatInfo info;
709       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
710       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
711       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
712               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
713               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
714       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
715       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
716       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
717       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
718       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
719       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
720       PetscFunctionReturn(0);
721     } else if (format == PETSC_VIEWER_ASCII_INFO) {
722       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
723       PetscFunctionReturn(0);
724     }
725   }
726 
727   if (isdraw) {
728     PetscDraw       draw;
729     PetscTruth isnull;
730     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
731     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
732   }
733 
734   if (size == 1) {
735     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
736     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
737   } else {
738     /* assemble the entire matrix onto first processor. */
739     Mat         A;
740     Mat_SeqSBAIJ *Aloc;
741     Mat_SeqBAIJ *Bloc;
742     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
743     MatScalar   *a;
744 
745     /* Should this be the same type as mat? */
746     if (!rank) {
747       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
748     } else {
749       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
750     }
751     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
752     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
753     PetscLogObjectParent(mat,A);
754 
755     /* copy over the A part */
756     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
757     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
758     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
759 
760     for (i=0; i<mbs; i++) {
761       rvals[0] = bs*(baij->rstart + i);
762       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
763       for (j=ai[i]; j<ai[i+1]; j++) {
764         col = (baij->cstart+aj[j])*bs;
765         for (k=0; k<bs; k++) {
766           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
767           col++; a += bs;
768         }
769       }
770     }
771     /* copy over the B part */
772     Bloc = (Mat_SeqBAIJ*)baij->B->data;
773     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
774     for (i=0; i<mbs; i++) {
775       rvals[0] = bs*(baij->rstart + i);
776       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
777       for (j=ai[i]; j<ai[i+1]; j++) {
778         col = baij->garray[aj[j]]*bs;
779         for (k=0; k<bs; k++) {
780           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
781           col++; a += bs;
782         }
783       }
784     }
785     ierr = PetscFree(rvals);CHKERRQ(ierr);
786     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
787     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788     /*
789        Everyone has to call to draw the matrix since the graphics waits are
790        synchronized across all processors that share the PetscDraw object
791     */
792     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
793     if (!rank) {
794       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
795       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
796     }
797     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
798     ierr = MatDestroy(A);CHKERRQ(ierr);
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatView_MPISBAIJ"
805 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
806 {
807   PetscErrorCode ierr;
808   PetscTruth     iascii,isdraw,issocket,isbinary;
809 
810   PetscFunctionBegin;
811   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
815   if (iascii || isdraw || issocket || isbinary) {
816     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
817   } else {
818     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatDestroy_MPISBAIJ"
825 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
826 {
827   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
828   PetscErrorCode ierr;
829 
830   PetscFunctionBegin;
831 #if defined(PETSC_USE_LOG)
832   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
833 #endif
834   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
835   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
836   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
837   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
838   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
839 #if defined (PETSC_USE_CTABLE)
840   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
841 #else
842   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
843 #endif
844   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
845   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
846   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
847   if (baij->slvec0) {
848     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
849     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
850   }
851   if (baij->slvec1) {
852     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
853     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
854     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
855   }
856   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
857   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
858   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
859   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
860 #if defined(PETSC_USE_MAT_SINGLE)
861   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
862 #endif
863   ierr = PetscFree(baij);CHKERRQ(ierr);
864 
865   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatMult_MPISBAIJ"
874 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
875 {
876   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
877   PetscErrorCode ierr;
878   PetscInt       nt,mbs=a->mbs,bs=A->bs;
879   PetscScalar    *x,*from,zero=0.0;
880 
881   PetscFunctionBegin;
882   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
883   if (nt != A->n) {
884     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
885   }
886   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
887   if (nt != A->m) {
888     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
889   }
890 
891   /* diagonal part */
892   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
893   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
894 
895   /* subdiagonal part */
896   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
897 
898   /* copy x into the vec slvec0 */
899   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
900   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
901   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
902   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
903 
904   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
905   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
906   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
907 
908   /* supperdiagonal part */
909   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
910 
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
916 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
917 {
918   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
919   PetscErrorCode ierr;
920   PetscInt       nt;
921 
922   PetscFunctionBegin;
923   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
924   if (nt != A->n) {
925     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
926   }
927   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
928   if (nt != A->m) {
929     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
930   }
931 
932   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
933   /* do diagonal part */
934   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
935   /* do supperdiagonal part */
936   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
937   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
938   /* do subdiagonal part */
939   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
940   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
941   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
942 
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
948 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
949 {
950   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
951   PetscErrorCode ierr;
952   PetscInt       mbs=a->mbs,bs=A->bs;
953   PetscScalar    *x,*from,zero=0.0;
954 
955   PetscFunctionBegin;
956   /*
957   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
958   PetscSynchronizedFlush(A->comm);
959   */
960   /* diagonal part */
961   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
962   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
963 
964   /* subdiagonal part */
965   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
966 
967   /* copy x into the vec slvec0 */
968   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
969   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
970   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
971   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
972 
973   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
974   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
975   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
976 
977   /* supperdiagonal part */
978   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
979 
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
985 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
986 {
987   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
988   PetscErrorCode ierr;
989 
990   PetscFunctionBegin;
991   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
992   /* do diagonal part */
993   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
994   /* do supperdiagonal part */
995   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
997 
998   /* do subdiagonal part */
999   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1002 
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1008 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1009 {
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1019 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1020 {
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 /*
1029   This only works correctly for square matrices where the subblock A->A is the
1030    diagonal block
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1034 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1035 {
1036   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038 
1039   PetscFunctionBegin;
1040   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1041   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatScale_MPISBAIJ"
1047 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1048 {
1049   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1050   PetscErrorCode ierr;
1051 
1052   PetscFunctionBegin;
1053   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1054   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1060 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1061 {
1062   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1063   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1064   PetscErrorCode ierr;
1065   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1066   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1067   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1068 
1069   PetscFunctionBegin;
1070   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1071   mat->getrowactive = PETSC_TRUE;
1072 
1073   if (!mat->rowvalues && (idx || v)) {
1074     /*
1075         allocate enough space to hold information from the longest row.
1076     */
1077     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1078     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1079     PetscInt     max = 1,mbs = mat->mbs,tmp;
1080     for (i=0; i<mbs; i++) {
1081       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1082       if (max < tmp) { max = tmp; }
1083     }
1084     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1085     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1086   }
1087 
1088   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1089   lrow = row - brstart;  /* local row index */
1090 
1091   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1092   if (!v)   {pvA = 0; pvB = 0;}
1093   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1094   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1095   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1096   nztot = nzA + nzB;
1097 
1098   cmap  = mat->garray;
1099   if (v  || idx) {
1100     if (nztot) {
1101       /* Sort by increasing column numbers, assuming A and B already sorted */
1102       PetscInt imark = -1;
1103       if (v) {
1104         *v = v_p = mat->rowvalues;
1105         for (i=0; i<nzB; i++) {
1106           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1107           else break;
1108         }
1109         imark = i;
1110         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1111         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1112       }
1113       if (idx) {
1114         *idx = idx_p = mat->rowindices;
1115         if (imark > -1) {
1116           for (i=0; i<imark; i++) {
1117             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1118           }
1119         } else {
1120           for (i=0; i<nzB; i++) {
1121             if (cmap[cworkB[i]/bs] < cstart)
1122               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1123             else break;
1124           }
1125           imark = i;
1126         }
1127         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1128         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1129       }
1130     } else {
1131       if (idx) *idx = 0;
1132       if (v)   *v   = 0;
1133     }
1134   }
1135   *nz = nztot;
1136   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1137   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1143 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1144 {
1145   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1146 
1147   PetscFunctionBegin;
1148   if (baij->getrowactive == PETSC_FALSE) {
1149     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1150   }
1151   baij->getrowactive = PETSC_FALSE;
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1157 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1158 {
1159   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1164   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1170 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1171 {
1172   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1173   Mat            A = a->A,B = a->B;
1174   PetscErrorCode ierr;
1175   PetscReal      isend[5],irecv[5];
1176 
1177   PetscFunctionBegin;
1178   info->block_size     = (PetscReal)matin->bs;
1179   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1180   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1181   isend[3] = info->memory;  isend[4] = info->mallocs;
1182   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1183   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1184   isend[3] += info->memory;  isend[4] += info->mallocs;
1185   if (flag == MAT_LOCAL) {
1186     info->nz_used      = isend[0];
1187     info->nz_allocated = isend[1];
1188     info->nz_unneeded  = isend[2];
1189     info->memory       = isend[3];
1190     info->mallocs      = isend[4];
1191   } else if (flag == MAT_GLOBAL_MAX) {
1192     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1193     info->nz_used      = irecv[0];
1194     info->nz_allocated = irecv[1];
1195     info->nz_unneeded  = irecv[2];
1196     info->memory       = irecv[3];
1197     info->mallocs      = irecv[4];
1198   } else if (flag == MAT_GLOBAL_SUM) {
1199     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1200     info->nz_used      = irecv[0];
1201     info->nz_allocated = irecv[1];
1202     info->nz_unneeded  = irecv[2];
1203     info->memory       = irecv[3];
1204     info->mallocs      = irecv[4];
1205   } else {
1206     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1207   }
1208   info->rows_global       = (PetscReal)A->M;
1209   info->columns_global    = (PetscReal)A->N;
1210   info->rows_local        = (PetscReal)A->m;
1211   info->columns_local     = (PetscReal)A->N;
1212   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1213   info->fill_ratio_needed = 0;
1214   info->factor_mallocs    = 0;
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1220 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1221 {
1222   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   switch (op) {
1227   case MAT_NO_NEW_NONZERO_LOCATIONS:
1228   case MAT_YES_NEW_NONZERO_LOCATIONS:
1229   case MAT_COLUMNS_UNSORTED:
1230   case MAT_COLUMNS_SORTED:
1231   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1232   case MAT_KEEP_ZEROED_ROWS:
1233   case MAT_NEW_NONZERO_LOCATION_ERR:
1234     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1235     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1236     break;
1237   case MAT_ROW_ORIENTED:
1238     a->roworiented = PETSC_TRUE;
1239     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1240     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_ROWS_SORTED:
1243   case MAT_ROWS_UNSORTED:
1244   case MAT_YES_NEW_DIAGONALS:
1245     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1246     break;
1247   case MAT_COLUMN_ORIENTED:
1248     a->roworiented = PETSC_FALSE;
1249     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1250     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1251     break;
1252   case MAT_IGNORE_OFF_PROC_ENTRIES:
1253     a->donotstash = PETSC_TRUE;
1254     break;
1255   case MAT_NO_NEW_DIAGONALS:
1256     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1257   case MAT_USE_HASH_TABLE:
1258     a->ht_flag = PETSC_TRUE;
1259     break;
1260   case MAT_NOT_SYMMETRIC:
1261   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1262   case MAT_HERMITIAN:
1263     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_NOT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268   case MAT_NOT_SYMMETRY_ETERNAL:
1269     break;
1270   default:
1271     SETERRQ(PETSC_ERR_SUP,"unknown option");
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1278 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1279 {
1280   PetscErrorCode ierr;
1281   PetscFunctionBegin;
1282   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1288 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1289 {
1290   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1291   Mat            a = baij->A,b = baij->B;
1292   PetscErrorCode ierr;
1293   PetscInt       s1,s2,s3;
1294 
1295   PetscFunctionBegin;
1296   if (ll != rr) {
1297     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1298   }
1299   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1300   if (rr) {
1301     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1302     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1303     /* Overlap communication with computation. */
1304     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1305     /*} if (ll) { */
1306     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1307     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1308     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1309     /* } */
1310   /* scale  the diagonal block */
1311   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1312 
1313   /* if (rr) { */
1314     /* Do a scatter end and then right scale the off-diagonal block */
1315     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1316     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1317   }
1318 
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1324 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1325 {
1326   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1327   MPI_Comm          comm = A->comm;
1328   static PetscTruth called = PETSC_FALSE;
1329   PetscErrorCode    ierr;
1330 
1331   PetscFunctionBegin;
1332   if (!a->rank) {
1333     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1334   }
1335   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1336   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1337   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1343 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1344 {
1345   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatEqual_MPISBAIJ"
1357 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1358 {
1359   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1360   Mat            a,b,c,d;
1361   PetscTruth     flg;
1362   PetscErrorCode ierr;
1363 
1364   PetscFunctionBegin;
1365   a = matA->A; b = matA->B;
1366   c = matB->A; d = matB->B;
1367 
1368   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1369   if (flg == PETSC_TRUE) {
1370     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1371   }
1372   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1378 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1379 {
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1389 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1390 {
1391   PetscErrorCode ierr;
1392   PetscInt       i;
1393   PetscTruth     flg;
1394 
1395   PetscFunctionBegin;
1396   for (i=0; i<n; i++) {
1397     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1398     if (!flg) {
1399       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1400     }
1401   }
1402   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 
1407 /* -------------------------------------------------------------------*/
1408 static struct _MatOps MatOps_Values = {
1409        MatSetValues_MPISBAIJ,
1410        MatGetRow_MPISBAIJ,
1411        MatRestoreRow_MPISBAIJ,
1412        MatMult_MPISBAIJ,
1413 /* 4*/ MatMultAdd_MPISBAIJ,
1414        MatMultTranspose_MPISBAIJ,
1415        MatMultTransposeAdd_MPISBAIJ,
1416        0,
1417        0,
1418        0,
1419 /*10*/ 0,
1420        0,
1421        0,
1422        MatRelax_MPISBAIJ,
1423        MatTranspose_MPISBAIJ,
1424 /*15*/ MatGetInfo_MPISBAIJ,
1425        MatEqual_MPISBAIJ,
1426        MatGetDiagonal_MPISBAIJ,
1427        MatDiagonalScale_MPISBAIJ,
1428        MatNorm_MPISBAIJ,
1429 /*20*/ MatAssemblyBegin_MPISBAIJ,
1430        MatAssemblyEnd_MPISBAIJ,
1431        0,
1432        MatSetOption_MPISBAIJ,
1433        MatZeroEntries_MPISBAIJ,
1434 /*25*/ 0,
1435        0,
1436        0,
1437        0,
1438        0,
1439 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1440        0,
1441        0,
1442        0,
1443        0,
1444 /*35*/ MatDuplicate_MPISBAIJ,
1445        0,
1446        0,
1447        0,
1448        0,
1449 /*40*/ 0,
1450        MatGetSubMatrices_MPISBAIJ,
1451        MatIncreaseOverlap_MPISBAIJ,
1452        MatGetValues_MPISBAIJ,
1453        0,
1454 /*45*/ MatPrintHelp_MPISBAIJ,
1455        MatScale_MPISBAIJ,
1456        0,
1457        0,
1458        0,
1459 /*50*/ 0,
1460        0,
1461        0,
1462        0,
1463        0,
1464 /*55*/ 0,
1465        0,
1466        MatSetUnfactored_MPISBAIJ,
1467        0,
1468        MatSetValuesBlocked_MPISBAIJ,
1469 /*60*/ 0,
1470        0,
1471        0,
1472        MatGetPetscMaps_Petsc,
1473        0,
1474 /*65*/ 0,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*70*/ MatGetRowMax_MPISBAIJ,
1480        0,
1481        0,
1482        0,
1483        0,
1484 /*75*/ 0,
1485        0,
1486        0,
1487        0,
1488        0,
1489 /*80*/ 0,
1490        0,
1491        0,
1492        0,
1493        MatLoad_MPISBAIJ,
1494 /*85*/ 0,
1495        0,
1496        0,
1497        0,
1498        0,
1499 /*90*/ 0,
1500        0,
1501        0,
1502        0,
1503        0,
1504 /*95*/ 0,
1505        0,
1506        0,
1507        0};
1508 
1509 
1510 EXTERN_C_BEGIN
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1513 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1514 {
1515   PetscFunctionBegin;
1516   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1517   *iscopy = PETSC_FALSE;
1518   PetscFunctionReturn(0);
1519 }
1520 EXTERN_C_END
1521 
1522 EXTERN_C_BEGIN
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1525 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1526 {
1527   Mat_MPISBAIJ   *b;
1528   PetscErrorCode ierr;
1529   PetscInt       i,mbs,Mbs;
1530 
1531   PetscFunctionBegin;
1532   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1533 
1534   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1535   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1536   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1537   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1538   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1539   if (d_nnz) {
1540     for (i=0; i<B->m/bs; i++) {
1541       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1542     }
1543   }
1544   if (o_nnz) {
1545     for (i=0; i<B->m/bs; i++) {
1546       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1547     }
1548   }
1549   B->preallocated = PETSC_TRUE;
1550   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1551   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1552   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1553   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1554 
1555   b   = (Mat_MPISBAIJ*)B->data;
1556   mbs = B->m/bs;
1557   Mbs = B->M/bs;
1558   if (mbs*bs != B->m) {
1559     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1560   }
1561 
1562   B->bs  = bs;
1563   b->bs2 = bs*bs;
1564   b->mbs = mbs;
1565   b->nbs = mbs;
1566   b->Mbs = Mbs;
1567   b->Nbs = Mbs;
1568 
1569   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1570   b->rowners[0]    = 0;
1571   for (i=2; i<=b->size; i++) {
1572     b->rowners[i] += b->rowners[i-1];
1573   }
1574   b->rstart    = b->rowners[b->rank];
1575   b->rend      = b->rowners[b->rank+1];
1576   b->cstart    = b->rstart;
1577   b->cend      = b->rend;
1578   for (i=0; i<=b->size; i++) {
1579     b->rowners_bs[i] = b->rowners[i]*bs;
1580   }
1581   b->rstart_bs = b-> rstart*bs;
1582   b->rend_bs   = b->rend*bs;
1583 
1584   b->cstart_bs = b->cstart*bs;
1585   b->cend_bs   = b->cend*bs;
1586 
1587   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1588   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1589   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1590   PetscLogObjectParent(B,b->A);
1591 
1592   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1593   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1594   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1595   PetscLogObjectParent(B,b->B);
1596 
1597   /* build cache for off array entries formed */
1598   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1599 
1600   PetscFunctionReturn(0);
1601 }
1602 EXTERN_C_END
1603 
1604 /*MC
1605    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1606    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1607 
1608    Options Database Keys:
1609 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1610 
1611   Level: beginner
1612 
1613 .seealso: MatCreateMPISBAIJ
1614 M*/
1615 
1616 EXTERN_C_BEGIN
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatCreate_MPISBAIJ"
1619 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1620 {
1621   Mat_MPISBAIJ   *b;
1622   PetscErrorCode ierr;
1623   PetscTruth     flg;
1624 
1625   PetscFunctionBegin;
1626 
1627   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1628   B->data = (void*)b;
1629   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1630 
1631   B->ops->destroy    = MatDestroy_MPISBAIJ;
1632   B->ops->view       = MatView_MPISBAIJ;
1633   B->mapping    = 0;
1634   B->factor     = 0;
1635   B->assembled  = PETSC_FALSE;
1636 
1637   B->insertmode = NOT_SET_VALUES;
1638   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1639   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1640 
1641   /* build local table of row and column ownerships */
1642   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1643   b->cowners    = b->rowners + b->size + 2;
1644   b->rowners_bs = b->cowners + b->size + 2;
1645   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1646 
1647   /* build cache for off array entries formed */
1648   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1649   b->donotstash  = PETSC_FALSE;
1650   b->colmap      = PETSC_NULL;
1651   b->garray      = PETSC_NULL;
1652   b->roworiented = PETSC_TRUE;
1653 
1654 #if defined(PETSC_USE_MAT_SINGLE)
1655   /* stuff for MatSetValues_XXX in single precision */
1656   b->setvalueslen     = 0;
1657   b->setvaluescopy    = PETSC_NULL;
1658 #endif
1659 
1660   /* stuff used in block assembly */
1661   b->barray       = 0;
1662 
1663   /* stuff used for matrix vector multiply */
1664   b->lvec         = 0;
1665   b->Mvctx        = 0;
1666   b->slvec0       = 0;
1667   b->slvec0b      = 0;
1668   b->slvec1       = 0;
1669   b->slvec1a      = 0;
1670   b->slvec1b      = 0;
1671   b->sMvctx       = 0;
1672 
1673   /* stuff for MatGetRow() */
1674   b->rowindices   = 0;
1675   b->rowvalues    = 0;
1676   b->getrowactive = PETSC_FALSE;
1677 
1678   /* hash table stuff */
1679   b->ht           = 0;
1680   b->hd           = 0;
1681   b->ht_size      = 0;
1682   b->ht_flag      = PETSC_FALSE;
1683   b->ht_fact      = 0;
1684   b->ht_total_ct  = 0;
1685   b->ht_insert_ct = 0;
1686 
1687   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1688   if (flg) {
1689     PetscReal fact = 1.39;
1690     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1691     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1692     if (fact <= 1.0) fact = 1.39;
1693     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1694     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1695   }
1696   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1697                                      "MatStoreValues_MPISBAIJ",
1698                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1699   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1700                                      "MatRetrieveValues_MPISBAIJ",
1701                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1702   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1703                                      "MatGetDiagonalBlock_MPISBAIJ",
1704                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1705   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1706                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1707                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1708   B->symmetric                  = PETSC_TRUE;
1709   B->structurally_symmetric     = PETSC_TRUE;
1710   B->symmetric_set              = PETSC_TRUE;
1711   B->structurally_symmetric_set = PETSC_TRUE;
1712   PetscFunctionReturn(0);
1713 }
1714 EXTERN_C_END
1715 
1716 /*MC
1717    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1718 
1719    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1720    and MATMPISBAIJ otherwise.
1721 
1722    Options Database Keys:
1723 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1724 
1725   Level: beginner
1726 
1727 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1728 M*/
1729 
1730 EXTERN_C_BEGIN
1731 #undef __FUNCT__
1732 #define __FUNCT__ "MatCreate_SBAIJ"
1733 PetscErrorCode MatCreate_SBAIJ(Mat A)
1734 {
1735   PetscErrorCode ierr;
1736   PetscMPIInt    size;
1737 
1738   PetscFunctionBegin;
1739   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1740   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1741   if (size == 1) {
1742     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1743   } else {
1744     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1745   }
1746   PetscFunctionReturn(0);
1747 }
1748 EXTERN_C_END
1749 
1750 #undef __FUNCT__
1751 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1752 /*@C
1753    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1754    the user should preallocate the matrix storage by setting the parameters
1755    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1756    performance can be increased by more than a factor of 50.
1757 
1758    Collective on Mat
1759 
1760    Input Parameters:
1761 +  A - the matrix
1762 .  bs   - size of blockk
1763 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1764            submatrix  (same for all local rows)
1765 .  d_nnz - array containing the number of block nonzeros in the various block rows
1766            in the upper triangular and diagonal part of the in diagonal portion of the local
1767            (possibly different for each block row) or PETSC_NULL.  You must leave room
1768            for the diagonal entry even if it is zero.
1769 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1770            submatrix (same for all local rows).
1771 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1772            off-diagonal portion of the local submatrix (possibly different for
1773            each block row) or PETSC_NULL.
1774 
1775 
1776    Options Database Keys:
1777 .   -mat_no_unroll - uses code that does not unroll the loops in the
1778                      block calculations (much slower)
1779 .   -mat_block_size - size of the blocks to use
1780 
1781    Notes:
1782 
1783    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1784    than it must be used on all processors that share the object for that argument.
1785 
1786    If the *_nnz parameter is given then the *_nz parameter is ignored
1787 
1788    Storage Information:
1789    For a square global matrix we define each processor's diagonal portion
1790    to be its local rows and the corresponding columns (a square submatrix);
1791    each processor's off-diagonal portion encompasses the remainder of the
1792    local matrix (a rectangular submatrix).
1793 
1794    The user can specify preallocated storage for the diagonal part of
1795    the local submatrix with either d_nz or d_nnz (not both).  Set
1796    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1797    memory allocation.  Likewise, specify preallocated storage for the
1798    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1799 
1800    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1801    the figure below we depict these three local rows and all columns (0-11).
1802 
1803 .vb
1804            0 1 2 3 4 5 6 7 8 9 10 11
1805           -------------------
1806    row 3  |  o o o d d d o o o o o o
1807    row 4  |  o o o d d d o o o o o o
1808    row 5  |  o o o d d d o o o o o o
1809           -------------------
1810 .ve
1811 
1812    Thus, any entries in the d locations are stored in the d (diagonal)
1813    submatrix, and any entries in the o locations are stored in the
1814    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1815    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1816 
1817    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1818    plus the diagonal part of the d matrix,
1819    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1820    In general, for PDE problems in which most nonzeros are near the diagonal,
1821    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1822    or you will get TERRIBLE performance; see the users' manual chapter on
1823    matrices.
1824 
1825    Level: intermediate
1826 
1827 .keywords: matrix, block, aij, compressed row, sparse, parallel
1828 
1829 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1830 @*/
1831 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1832 {
1833   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1834 
1835   PetscFunctionBegin;
1836   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1837   if (f) {
1838     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1839   }
1840   PetscFunctionReturn(0);
1841 }
1842 
1843 #undef __FUNCT__
1844 #define __FUNCT__ "MatCreateMPISBAIJ"
1845 /*@C
1846    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1847    (block compressed row).  For good matrix assembly performance
1848    the user should preallocate the matrix storage by setting the parameters
1849    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1850    performance can be increased by more than a factor of 50.
1851 
1852    Collective on MPI_Comm
1853 
1854    Input Parameters:
1855 +  comm - MPI communicator
1856 .  bs   - size of blockk
1857 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1858            This value should be the same as the local size used in creating the
1859            y vector for the matrix-vector product y = Ax.
1860 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1861            This value should be the same as the local size used in creating the
1862            x vector for the matrix-vector product y = Ax.
1863 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1864 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1865 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1866            submatrix  (same for all local rows)
1867 .  d_nnz - array containing the number of block nonzeros in the various block rows
1868            in the upper triangular portion of the in diagonal portion of the local
1869            (possibly different for each block block row) or PETSC_NULL.
1870            You must leave room for the diagonal entry even if it is zero.
1871 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1872            submatrix (same for all local rows).
1873 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1874            off-diagonal portion of the local submatrix (possibly different for
1875            each block row) or PETSC_NULL.
1876 
1877    Output Parameter:
1878 .  A - the matrix
1879 
1880    Options Database Keys:
1881 .   -mat_no_unroll - uses code that does not unroll the loops in the
1882                      block calculations (much slower)
1883 .   -mat_block_size - size of the blocks to use
1884 .   -mat_mpi - use the parallel matrix data structures even on one processor
1885                (defaults to using SeqBAIJ format on one processor)
1886 
1887    Notes:
1888    The user MUST specify either the local or global matrix dimensions
1889    (possibly both).
1890 
1891    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1892    than it must be used on all processors that share the object for that argument.
1893 
1894    If the *_nnz parameter is given then the *_nz parameter is ignored
1895 
1896    Storage Information:
1897    For a square global matrix we define each processor's diagonal portion
1898    to be its local rows and the corresponding columns (a square submatrix);
1899    each processor's off-diagonal portion encompasses the remainder of the
1900    local matrix (a rectangular submatrix).
1901 
1902    The user can specify preallocated storage for the diagonal part of
1903    the local submatrix with either d_nz or d_nnz (not both).  Set
1904    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1905    memory allocation.  Likewise, specify preallocated storage for the
1906    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1907 
1908    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1909    the figure below we depict these three local rows and all columns (0-11).
1910 
1911 .vb
1912            0 1 2 3 4 5 6 7 8 9 10 11
1913           -------------------
1914    row 3  |  o o o d d d o o o o o o
1915    row 4  |  o o o d d d o o o o o o
1916    row 5  |  o o o d d d o o o o o o
1917           -------------------
1918 .ve
1919 
1920    Thus, any entries in the d locations are stored in the d (diagonal)
1921    submatrix, and any entries in the o locations are stored in the
1922    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1923    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1924 
1925    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1926    plus the diagonal part of the d matrix,
1927    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1928    In general, for PDE problems in which most nonzeros are near the diagonal,
1929    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1930    or you will get TERRIBLE performance; see the users' manual chapter on
1931    matrices.
1932 
1933    Level: intermediate
1934 
1935 .keywords: matrix, block, aij, compressed row, sparse, parallel
1936 
1937 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1938 @*/
1939 
1940 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1941 {
1942   PetscErrorCode ierr;
1943   PetscMPIInt    size;
1944 
1945   PetscFunctionBegin;
1946   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1947   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1948   if (size > 1) {
1949     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1950     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1951   } else {
1952     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1953     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1954   }
1955   PetscFunctionReturn(0);
1956 }
1957 
1958 
1959 #undef __FUNCT__
1960 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1961 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1962 {
1963   Mat            mat;
1964   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1965   PetscErrorCode ierr;
1966   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1967   PetscScalar    *array;
1968 
1969   PetscFunctionBegin;
1970   *newmat       = 0;
1971   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1972   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1973   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1974 
1975   mat->factor       = matin->factor;
1976   mat->preallocated = PETSC_TRUE;
1977   mat->assembled    = PETSC_TRUE;
1978   mat->insertmode   = NOT_SET_VALUES;
1979 
1980   a = (Mat_MPISBAIJ*)mat->data;
1981   mat->bs  = matin->bs;
1982   a->bs2   = oldmat->bs2;
1983   a->mbs   = oldmat->mbs;
1984   a->nbs   = oldmat->nbs;
1985   a->Mbs   = oldmat->Mbs;
1986   a->Nbs   = oldmat->Nbs;
1987 
1988   a->rstart       = oldmat->rstart;
1989   a->rend         = oldmat->rend;
1990   a->cstart       = oldmat->cstart;
1991   a->cend         = oldmat->cend;
1992   a->size         = oldmat->size;
1993   a->rank         = oldmat->rank;
1994   a->donotstash   = oldmat->donotstash;
1995   a->roworiented  = oldmat->roworiented;
1996   a->rowindices   = 0;
1997   a->rowvalues    = 0;
1998   a->getrowactive = PETSC_FALSE;
1999   a->barray       = 0;
2000   a->rstart_bs    = oldmat->rstart_bs;
2001   a->rend_bs      = oldmat->rend_bs;
2002   a->cstart_bs    = oldmat->cstart_bs;
2003   a->cend_bs      = oldmat->cend_bs;
2004 
2005   /* hash table stuff */
2006   a->ht           = 0;
2007   a->hd           = 0;
2008   a->ht_size      = 0;
2009   a->ht_flag      = oldmat->ht_flag;
2010   a->ht_fact      = oldmat->ht_fact;
2011   a->ht_total_ct  = 0;
2012   a->ht_insert_ct = 0;
2013 
2014   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2015   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2016   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2017   if (oldmat->colmap) {
2018 #if defined (PETSC_USE_CTABLE)
2019     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2020 #else
2021     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2022     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2023     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2024 #endif
2025   } else a->colmap = 0;
2026 
2027   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2028     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2029     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2030     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2031   } else a->garray = 0;
2032 
2033   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2034   PetscLogObjectParent(mat,a->lvec);
2035   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2036   PetscLogObjectParent(mat,a->Mvctx);
2037 
2038   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2039   PetscLogObjectParent(mat,a->slvec0);
2040   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2041   PetscLogObjectParent(mat,a->slvec1);
2042 
2043   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2044   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2045   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2046   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2047   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2048   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2049   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2050   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2051   PetscLogObjectParent(mat,a->slvec0);
2052   PetscLogObjectParent(mat,a->slvec1);
2053   PetscLogObjectParent(mat,a->slvec0b);
2054   PetscLogObjectParent(mat,a->slvec1a);
2055   PetscLogObjectParent(mat,a->slvec1b);
2056 
2057   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2058   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2059   a->sMvctx = oldmat->sMvctx;
2060   PetscLogObjectParent(mat,a->sMvctx);
2061 
2062   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2063   PetscLogObjectParent(mat,a->A);
2064   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2065   PetscLogObjectParent(mat,a->B);
2066   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2067   *newmat = mat;
2068   PetscFunctionReturn(0);
2069 }
2070 
2071 #include "petscsys.h"
2072 
2073 #undef __FUNCT__
2074 #define __FUNCT__ "MatLoad_MPISBAIJ"
2075 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2076 {
2077   Mat            A;
2078   PetscErrorCode ierr;
2079   PetscInt       i,nz,j,rstart,rend;
2080   PetscScalar    *vals,*buf;
2081   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2082   MPI_Status     status;
2083   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2084   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2085   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2086   PetscInt       bs=1,Mbs,mbs,extra_rows;
2087   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2088   PetscInt       dcount,kmax,k,nzcount,tmp;
2089   int            fd;
2090 
2091   PetscFunctionBegin;
2092   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2093 
2094   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2095   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2096   if (!rank) {
2097     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2098     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2099     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2100     if (header[3] < 0) {
2101       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2102     }
2103   }
2104 
2105   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2106   M = header[1]; N = header[2];
2107 
2108   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2109 
2110   /*
2111      This code adds extra rows to make sure the number of rows is
2112      divisible by the blocksize
2113   */
2114   Mbs        = M/bs;
2115   extra_rows = bs - M + bs*(Mbs);
2116   if (extra_rows == bs) extra_rows = 0;
2117   else                  Mbs++;
2118   if (extra_rows &&!rank) {
2119     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2120   }
2121 
2122   /* determine ownership of all rows */
2123   mbs        = Mbs/size + ((Mbs % size) > rank);
2124   m          = mbs*bs;
2125   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2126   browners   = rowners + size + 1;
2127   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2128   rowners[0] = 0;
2129   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2130   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2131   rstart = rowners[rank];
2132   rend   = rowners[rank+1];
2133 
2134   /* distribute row lengths to all processors */
2135   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2136   if (!rank) {
2137     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2138     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2139     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2140     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2141     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2142     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2143     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2144   } else {
2145     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2146   }
2147 
2148   if (!rank) {   /* procs[0] */
2149     /* calculate the number of nonzeros on each processor */
2150     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2151     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2152     for (i=0; i<size; i++) {
2153       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2154         procsnz[i] += rowlengths[j];
2155       }
2156     }
2157     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2158 
2159     /* determine max buffer needed and allocate it */
2160     maxnz = 0;
2161     for (i=0; i<size; i++) {
2162       maxnz = PetscMax(maxnz,procsnz[i]);
2163     }
2164     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2165 
2166     /* read in my part of the matrix column indices  */
2167     nz     = procsnz[0];
2168     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2169     mycols = ibuf;
2170     if (size == 1)  nz -= extra_rows;
2171     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2172     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2173 
2174     /* read in every ones (except the last) and ship off */
2175     for (i=1; i<size-1; i++) {
2176       nz   = procsnz[i];
2177       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2178       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2179     }
2180     /* read in the stuff for the last proc */
2181     if (size != 1) {
2182       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2183       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2184       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2185       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2186     }
2187     ierr = PetscFree(cols);CHKERRQ(ierr);
2188   } else {  /* procs[i], i>0 */
2189     /* determine buffer space needed for message */
2190     nz = 0;
2191     for (i=0; i<m; i++) {
2192       nz += locrowlens[i];
2193     }
2194     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2195     mycols = ibuf;
2196     /* receive message of column indices*/
2197     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2198     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2199     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2200   }
2201 
2202   /* loop over local rows, determining number of off diagonal entries */
2203   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2204   odlens   = dlens + (rend-rstart);
2205   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2206   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2207   masked1  = mask    + Mbs;
2208   masked2  = masked1 + Mbs;
2209   rowcount = 0; nzcount = 0;
2210   for (i=0; i<mbs; i++) {
2211     dcount  = 0;
2212     odcount = 0;
2213     for (j=0; j<bs; j++) {
2214       kmax = locrowlens[rowcount];
2215       for (k=0; k<kmax; k++) {
2216         tmp = mycols[nzcount++]/bs; /* block col. index */
2217         if (!mask[tmp]) {
2218           mask[tmp] = 1;
2219           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2220           else masked1[dcount++] = tmp; /* entry in diag portion */
2221         }
2222       }
2223       rowcount++;
2224     }
2225 
2226     dlens[i]  = dcount;  /* d_nzz[i] */
2227     odlens[i] = odcount; /* o_nzz[i] */
2228 
2229     /* zero out the mask elements we set */
2230     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2231     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2232   }
2233 
2234   /* create our matrix */
2235   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2236   ierr = MatSetType(A,type);CHKERRQ(ierr);
2237   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2238   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2239 
2240   if (!rank) {
2241     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2242     /* read in my part of the matrix numerical values  */
2243     nz = procsnz[0];
2244     vals = buf;
2245     mycols = ibuf;
2246     if (size == 1)  nz -= extra_rows;
2247     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2248     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2249 
2250     /* insert into matrix */
2251     jj      = rstart*bs;
2252     for (i=0; i<m; i++) {
2253       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2254       mycols += locrowlens[i];
2255       vals   += locrowlens[i];
2256       jj++;
2257     }
2258 
2259     /* read in other processors (except the last one) and ship out */
2260     for (i=1; i<size-1; i++) {
2261       nz   = procsnz[i];
2262       vals = buf;
2263       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2264       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2265     }
2266     /* the last proc */
2267     if (size != 1){
2268       nz   = procsnz[i] - extra_rows;
2269       vals = buf;
2270       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2271       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2272       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2273     }
2274     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2275 
2276   } else {
2277     /* receive numeric values */
2278     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2279 
2280     /* receive message of values*/
2281     vals   = buf;
2282     mycols = ibuf;
2283     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2284     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2285     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2286 
2287     /* insert into matrix */
2288     jj      = rstart*bs;
2289     for (i=0; i<m; i++) {
2290       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2291       mycols += locrowlens[i];
2292       vals   += locrowlens[i];
2293       jj++;
2294     }
2295   }
2296 
2297   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2298   ierr = PetscFree(buf);CHKERRQ(ierr);
2299   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2300   ierr = PetscFree(rowners);CHKERRQ(ierr);
2301   ierr = PetscFree(dlens);CHKERRQ(ierr);
2302   ierr = PetscFree(mask);CHKERRQ(ierr);
2303   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2304   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305   *newmat = A;
2306   PetscFunctionReturn(0);
2307 }
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2311 /*XXXXX@
2312    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2313 
2314    Input Parameters:
2315 .  mat  - the matrix
2316 .  fact - factor
2317 
2318    Collective on Mat
2319 
2320    Level: advanced
2321 
2322   Notes:
2323    This can also be set by the command line option: -mat_use_hash_table fact
2324 
2325 .keywords: matrix, hashtable, factor, HT
2326 
2327 .seealso: MatSetOption()
2328 @XXXXX*/
2329 
2330 
2331 #undef __FUNCT__
2332 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2333 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2334 {
2335   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2336   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2337   PetscReal      atmp;
2338   PetscReal      *work,*svalues,*rvalues;
2339   PetscErrorCode ierr;
2340   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2341   PetscMPIInt    rank,size;
2342   PetscInt       *rowners_bs,dest,count,source;
2343   PetscScalar    *va;
2344   MatScalar      *ba;
2345   MPI_Status     stat;
2346 
2347   PetscFunctionBegin;
2348   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2349   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2350 
2351   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2352   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2353 
2354   bs   = A->bs;
2355   mbs  = a->mbs;
2356   Mbs  = a->Mbs;
2357   ba   = b->a;
2358   bi   = b->i;
2359   bj   = b->j;
2360 
2361   /* find ownerships */
2362   rowners_bs = a->rowners_bs;
2363 
2364   /* each proc creates an array to be distributed */
2365   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2366   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2367 
2368   /* row_max for B */
2369   if (rank != size-1){
2370     for (i=0; i<mbs; i++) {
2371       ncols = bi[1] - bi[0]; bi++;
2372       brow  = bs*i;
2373       for (j=0; j<ncols; j++){
2374         bcol = bs*(*bj);
2375         for (kcol=0; kcol<bs; kcol++){
2376           col = bcol + kcol;                 /* local col index */
2377           col += rowners_bs[rank+1];      /* global col index */
2378           for (krow=0; krow<bs; krow++){
2379             atmp = PetscAbsScalar(*ba); ba++;
2380             row = brow + krow;    /* local row index */
2381             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2382             if (work[col] < atmp) work[col] = atmp;
2383           }
2384         }
2385         bj++;
2386       }
2387     }
2388 
2389     /* send values to its owners */
2390     for (dest=rank+1; dest<size; dest++){
2391       svalues = work + rowners_bs[dest];
2392       count   = rowners_bs[dest+1]-rowners_bs[dest];
2393       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2394     }
2395   }
2396 
2397   /* receive values */
2398   if (rank){
2399     rvalues = work;
2400     count   = rowners_bs[rank+1]-rowners_bs[rank];
2401     for (source=0; source<rank; source++){
2402       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2403       /* process values */
2404       for (i=0; i<count; i++){
2405         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2406       }
2407     }
2408   }
2409 
2410   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2411   ierr = PetscFree(work);CHKERRQ(ierr);
2412   PetscFunctionReturn(0);
2413 }
2414 
2415 #undef __FUNCT__
2416 #define __FUNCT__ "MatRelax_MPISBAIJ"
2417 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2418 {
2419   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2420   PetscErrorCode ierr;
2421   PetscInt       mbs=mat->mbs,bs=matin->bs;
2422   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2423   Vec            bb1;
2424 
2425   PetscFunctionBegin;
2426   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2427   if (bs > 1)
2428     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2429 
2430   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2431     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2432       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2433       its--;
2434     }
2435 
2436     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2437     while (its--){
2438 
2439       /* lower triangular part: slvec0b = - B^T*xx */
2440       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2441 
2442       /* copy xx into slvec0a */
2443       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2444       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2445       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2446       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2447 
2448       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2449 
2450       /* copy bb into slvec1a */
2451       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2452       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2453       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2454       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2455 
2456       /* set slvec1b = 0 */
2457       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2458 
2459       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2460       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2461       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2462       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2463 
2464       /* upper triangular part: bb1 = bb1 - B*x */
2465       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2466 
2467       /* local diagonal sweep */
2468       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2469     }
2470     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2471   } else {
2472     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2473   }
2474   PetscFunctionReturn(0);
2475 }
2476 
2477 #undef __FUNCT__
2478 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2479 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2480 {
2481   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2482   PetscErrorCode ierr;
2483   PetscScalar    mone=-1.0;
2484   Vec            lvec1,bb1;
2485 
2486   PetscFunctionBegin;
2487   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2488   if (matin->bs > 1)
2489     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2490 
2491   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2492     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2493       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2494       its--;
2495     }
2496 
2497     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2498     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2499     while (its--){
2500       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2501 
2502       /* lower diagonal part: bb1 = bb - B^T*xx */
2503       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2504       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2505 
2506       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2507       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2508       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2509 
2510       /* upper diagonal part: bb1 = bb1 - B*x */
2511       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2512       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2513 
2514       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2515 
2516       /* diagonal sweep */
2517       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2518     }
2519     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2520     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2521   } else {
2522     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2523   }
2524   PetscFunctionReturn(0);
2525 }
2526 
2527