xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 0be8e73acc056e1817fd33db9a767c1d1a87daa9)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else {
563       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
564     }
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
571 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
572 {
573   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
574   PetscErrorCode ierr;
575   PetscInt       nstash,reallocs;
576   InsertMode     addv;
577 
578   PetscFunctionBegin;
579   if (baij->donotstash) {
580     PetscFunctionReturn(0);
581   }
582 
583   /* make sure all processors are either in INSERTMODE or ADDMODE */
584   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
585   if (addv == (ADD_VALUES|INSERT_VALUES)) {
586     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
587   }
588   mat->insertmode = addv; /* in case this processor had no cache */
589 
590   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
591   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
592   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
593   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
594   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
595   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
601 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
602 {
603   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
604   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
605   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
606   PetscErrorCode ierr;
607   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
608   PetscInt       *row,*col,other_disassembled;
609   PetscMPIInt    n;
610   PetscTruth     r1,r2,r3;
611   MatScalar      *val;
612   InsertMode     addv = mat->insertmode;
613 
614   PetscFunctionBegin;
615 
616   if (!baij->donotstash) {
617     while (1) {
618       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
619       if (!flg) break;
620 
621       for (i=0; i<n;) {
622         /* Now identify the consecutive vals belonging to the same row */
623         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
624         if (j < n) ncols = j-i;
625         else       ncols = n-i;
626         /* Now assemble all these values with a single function call */
627         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
628         i = j;
629       }
630     }
631     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
632     /* Now process the block-stash. Since the values are stashed column-oriented,
633        set the roworiented flag to column oriented, and after MatSetValues()
634        restore the original flags */
635     r1 = baij->roworiented;
636     r2 = a->roworiented;
637     r3 = b->roworiented;
638     baij->roworiented = PETSC_FALSE;
639     a->roworiented    = PETSC_FALSE;
640     b->roworiented    = PETSC_FALSE;
641     while (1) {
642       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
643       if (!flg) break;
644 
645       for (i=0; i<n;) {
646         /* Now identify the consecutive vals belonging to the same row */
647         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
648         if (j < n) ncols = j-i;
649         else       ncols = n-i;
650         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
651         i = j;
652       }
653     }
654     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
655     baij->roworiented = r1;
656     a->roworiented    = r2;
657     b->roworiented    = r3;
658   }
659 
660   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675 
676   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
677     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
678   }
679   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
680   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
681 
682   if (baij->rowvalues) {
683     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
684     baij->rowvalues = 0;
685   }
686 
687   PetscFunctionReturn(0);
688 }
689 
690 #undef __FUNCT__
691 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
692 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
693 {
694   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
695   PetscErrorCode    ierr;
696   PetscInt          bs = mat->bs;
697   PetscMPIInt       size = baij->size,rank = baij->rank;
698   PetscTruth        iascii,isdraw;
699   PetscViewer       sviewer;
700   PetscViewerFormat format;
701 
702   PetscFunctionBegin;
703   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
704   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
705   if (iascii) {
706     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
707     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
708       MatInfo info;
709       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
710       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
711       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
712               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
713               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
714       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
715       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
716       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
717       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
718       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
719       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
720       PetscFunctionReturn(0);
721     } else if (format == PETSC_VIEWER_ASCII_INFO) {
722       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
723       PetscFunctionReturn(0);
724     }
725   }
726 
727   if (isdraw) {
728     PetscDraw       draw;
729     PetscTruth isnull;
730     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
731     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
732   }
733 
734   if (size == 1) {
735     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
736     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
737   } else {
738     /* assemble the entire matrix onto first processor. */
739     Mat         A;
740     Mat_SeqSBAIJ *Aloc;
741     Mat_SeqBAIJ *Bloc;
742     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
743     MatScalar   *a;
744 
745     /* Should this be the same type as mat? */
746     if (!rank) {
747       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
748     } else {
749       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
750     }
751     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
752     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
753     PetscLogObjectParent(mat,A);
754 
755     /* copy over the A part */
756     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
757     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
758     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
759 
760     for (i=0; i<mbs; i++) {
761       rvals[0] = bs*(baij->rstart + i);
762       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
763       for (j=ai[i]; j<ai[i+1]; j++) {
764         col = (baij->cstart+aj[j])*bs;
765         for (k=0; k<bs; k++) {
766           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
767           col++; a += bs;
768         }
769       }
770     }
771     /* copy over the B part */
772     Bloc = (Mat_SeqBAIJ*)baij->B->data;
773     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
774     for (i=0; i<mbs; i++) {
775       rvals[0] = bs*(baij->rstart + i);
776       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
777       for (j=ai[i]; j<ai[i+1]; j++) {
778         col = baij->garray[aj[j]]*bs;
779         for (k=0; k<bs; k++) {
780           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
781           col++; a += bs;
782         }
783       }
784     }
785     ierr = PetscFree(rvals);CHKERRQ(ierr);
786     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
787     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788     /*
789        Everyone has to call to draw the matrix since the graphics waits are
790        synchronized across all processors that share the PetscDraw object
791     */
792     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
793     if (!rank) {
794       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
795       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
796     }
797     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
798     ierr = MatDestroy(A);CHKERRQ(ierr);
799   }
800   PetscFunctionReturn(0);
801 }
802 
803 #undef __FUNCT__
804 #define __FUNCT__ "MatView_MPISBAIJ"
805 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
806 {
807   PetscErrorCode ierr;
808   PetscTruth     iascii,isdraw,issocket,isbinary;
809 
810   PetscFunctionBegin;
811   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
815   if (iascii || isdraw || issocket || isbinary) {
816     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
817   } else {
818     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
819   }
820   PetscFunctionReturn(0);
821 }
822 
823 #undef __FUNCT__
824 #define __FUNCT__ "MatDestroy_MPISBAIJ"
825 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
826 {
827   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
828   PetscErrorCode ierr;
829 
830   PetscFunctionBegin;
831 #if defined(PETSC_USE_LOG)
832   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
833 #endif
834   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
835   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
836   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
837   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
838   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
839 #if defined (PETSC_USE_CTABLE)
840   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
841 #else
842   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
843 #endif
844   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
845   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
846   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
847   if (baij->slvec0) {
848     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
849     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
850   }
851   if (baij->slvec1) {
852     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
853     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
854     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
855   }
856   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
857   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
858   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
859   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
860 #if defined(PETSC_USE_MAT_SINGLE)
861   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
862 #endif
863   ierr = PetscFree(baij);CHKERRQ(ierr);
864 
865   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
869   PetscFunctionReturn(0);
870 }
871 
872 #undef __FUNCT__
873 #define __FUNCT__ "MatMult_MPISBAIJ"
874 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
875 {
876   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
877   PetscErrorCode ierr;
878   PetscInt       nt,mbs=a->mbs,bs=A->bs;
879   PetscScalar    *x,*from,zero=0.0;
880 
881   PetscFunctionBegin;
882   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
883   if (nt != A->n) {
884     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
885   }
886   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
887   if (nt != A->m) {
888     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
889   }
890 
891   /* diagonal part */
892   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
893   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
894 
895   /* subdiagonal part */
896   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
897 
898   /* copy x into the vec slvec0 */
899   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
900   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
901   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
902   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
903 
904   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
905   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
906   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
907 
908   /* supperdiagonal part */
909   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
910 
911   PetscFunctionReturn(0);
912 }
913 
914 #undef __FUNCT__
915 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
916 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
917 {
918   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
919   PetscErrorCode ierr;
920   PetscInt       nt;
921 
922   PetscFunctionBegin;
923   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
924   if (nt != A->n) {
925     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
926   }
927   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
928   if (nt != A->m) {
929     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
930   }
931 
932   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
933   /* do diagonal part */
934   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
935   /* do supperdiagonal part */
936   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
937   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
938   /* do subdiagonal part */
939   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
940   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
941   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
942 
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
948 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
949 {
950   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
951   PetscErrorCode ierr;
952   PetscInt       mbs=a->mbs,bs=A->bs;
953   PetscScalar    *x,*from,zero=0.0;
954 
955   PetscFunctionBegin;
956   /*
957   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
958   PetscSynchronizedFlush(A->comm);
959   */
960   /* diagonal part */
961   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
962   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
963 
964   /* subdiagonal part */
965   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
966 
967   /* copy x into the vec slvec0 */
968   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
969   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
970   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
971   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
972 
973   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
974   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
975   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
976 
977   /* supperdiagonal part */
978   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
979 
980   PetscFunctionReturn(0);
981 }
982 
983 #undef __FUNCT__
984 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
985 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
986 {
987   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
988   PetscErrorCode ierr;
989 
990   PetscFunctionBegin;
991   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
992   /* do diagonal part */
993   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
994   /* do supperdiagonal part */
995   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
996   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
997 
998   /* do subdiagonal part */
999   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1000   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1001   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1002 
1003   PetscFunctionReturn(0);
1004 }
1005 
1006 #undef __FUNCT__
1007 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1008 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1009 {
1010   PetscErrorCode ierr;
1011 
1012   PetscFunctionBegin;
1013   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1019 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1020 {
1021   PetscErrorCode ierr;
1022 
1023   PetscFunctionBegin;
1024   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1025   PetscFunctionReturn(0);
1026 }
1027 
1028 /*
1029   This only works correctly for square matrices where the subblock A->A is the
1030    diagonal block
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1034 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1035 {
1036   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1037   PetscErrorCode ierr;
1038 
1039   PetscFunctionBegin;
1040   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1041   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatScale_MPISBAIJ"
1047 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1048 {
1049   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1050   PetscErrorCode ierr;
1051 
1052   PetscFunctionBegin;
1053   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1054   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1060 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1061 {
1062   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1063   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1064   PetscErrorCode ierr;
1065   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1066   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1067   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1068 
1069   PetscFunctionBegin;
1070   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1071   mat->getrowactive = PETSC_TRUE;
1072 
1073   if (!mat->rowvalues && (idx || v)) {
1074     /*
1075         allocate enough space to hold information from the longest row.
1076     */
1077     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1078     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1079     PetscInt     max = 1,mbs = mat->mbs,tmp;
1080     for (i=0; i<mbs; i++) {
1081       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1082       if (max < tmp) { max = tmp; }
1083     }
1084     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1085     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1086   }
1087 
1088   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1089   lrow = row - brstart;  /* local row index */
1090 
1091   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1092   if (!v)   {pvA = 0; pvB = 0;}
1093   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1094   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1095   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1096   nztot = nzA + nzB;
1097 
1098   cmap  = mat->garray;
1099   if (v  || idx) {
1100     if (nztot) {
1101       /* Sort by increasing column numbers, assuming A and B already sorted */
1102       PetscInt imark = -1;
1103       if (v) {
1104         *v = v_p = mat->rowvalues;
1105         for (i=0; i<nzB; i++) {
1106           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1107           else break;
1108         }
1109         imark = i;
1110         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1111         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1112       }
1113       if (idx) {
1114         *idx = idx_p = mat->rowindices;
1115         if (imark > -1) {
1116           for (i=0; i<imark; i++) {
1117             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1118           }
1119         } else {
1120           for (i=0; i<nzB; i++) {
1121             if (cmap[cworkB[i]/bs] < cstart)
1122               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1123             else break;
1124           }
1125           imark = i;
1126         }
1127         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1128         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1129       }
1130     } else {
1131       if (idx) *idx = 0;
1132       if (v)   *v   = 0;
1133     }
1134   }
1135   *nz = nztot;
1136   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1137   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1138   PetscFunctionReturn(0);
1139 }
1140 
1141 #undef __FUNCT__
1142 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1143 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1144 {
1145   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1146 
1147   PetscFunctionBegin;
1148   if (baij->getrowactive == PETSC_FALSE) {
1149     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1150   }
1151   baij->getrowactive = PETSC_FALSE;
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1157 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1158 {
1159   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1164   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1170 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1171 {
1172   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1173   Mat            A = a->A,B = a->B;
1174   PetscErrorCode ierr;
1175   PetscReal      isend[5],irecv[5];
1176 
1177   PetscFunctionBegin;
1178   info->block_size     = (PetscReal)matin->bs;
1179   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1180   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1181   isend[3] = info->memory;  isend[4] = info->mallocs;
1182   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1183   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1184   isend[3] += info->memory;  isend[4] += info->mallocs;
1185   if (flag == MAT_LOCAL) {
1186     info->nz_used      = isend[0];
1187     info->nz_allocated = isend[1];
1188     info->nz_unneeded  = isend[2];
1189     info->memory       = isend[3];
1190     info->mallocs      = isend[4];
1191   } else if (flag == MAT_GLOBAL_MAX) {
1192     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1193     info->nz_used      = irecv[0];
1194     info->nz_allocated = irecv[1];
1195     info->nz_unneeded  = irecv[2];
1196     info->memory       = irecv[3];
1197     info->mallocs      = irecv[4];
1198   } else if (flag == MAT_GLOBAL_SUM) {
1199     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1200     info->nz_used      = irecv[0];
1201     info->nz_allocated = irecv[1];
1202     info->nz_unneeded  = irecv[2];
1203     info->memory       = irecv[3];
1204     info->mallocs      = irecv[4];
1205   } else {
1206     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1207   }
1208   info->rows_global       = (PetscReal)A->M;
1209   info->columns_global    = (PetscReal)A->N;
1210   info->rows_local        = (PetscReal)A->m;
1211   info->columns_local     = (PetscReal)A->N;
1212   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1213   info->fill_ratio_needed = 0;
1214   info->factor_mallocs    = 0;
1215   PetscFunctionReturn(0);
1216 }
1217 
1218 #undef __FUNCT__
1219 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1220 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1221 {
1222   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1223   PetscErrorCode ierr;
1224 
1225   PetscFunctionBegin;
1226   switch (op) {
1227   case MAT_NO_NEW_NONZERO_LOCATIONS:
1228   case MAT_YES_NEW_NONZERO_LOCATIONS:
1229   case MAT_COLUMNS_UNSORTED:
1230   case MAT_COLUMNS_SORTED:
1231   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1232   case MAT_KEEP_ZEROED_ROWS:
1233   case MAT_NEW_NONZERO_LOCATION_ERR:
1234     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1235     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1236     break;
1237   case MAT_ROW_ORIENTED:
1238     a->roworiented = PETSC_TRUE;
1239     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1240     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1241     break;
1242   case MAT_ROWS_SORTED:
1243   case MAT_ROWS_UNSORTED:
1244   case MAT_YES_NEW_DIAGONALS:
1245     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1246     break;
1247   case MAT_COLUMN_ORIENTED:
1248     a->roworiented = PETSC_FALSE;
1249     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1250     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1251     break;
1252   case MAT_IGNORE_OFF_PROC_ENTRIES:
1253     a->donotstash = PETSC_TRUE;
1254     break;
1255   case MAT_NO_NEW_DIAGONALS:
1256     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1257   case MAT_USE_HASH_TABLE:
1258     a->ht_flag = PETSC_TRUE;
1259     break;
1260   case MAT_NOT_SYMMETRIC:
1261   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1262   case MAT_HERMITIAN:
1263     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1264   case MAT_SYMMETRIC:
1265   case MAT_STRUCTURALLY_SYMMETRIC:
1266   case MAT_NOT_HERMITIAN:
1267   case MAT_SYMMETRY_ETERNAL:
1268   case MAT_NOT_SYMMETRY_ETERNAL:
1269     break;
1270   default:
1271     SETERRQ(PETSC_ERR_SUP,"unknown option");
1272   }
1273   PetscFunctionReturn(0);
1274 }
1275 
1276 #undef __FUNCT__
1277 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1278 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1279 {
1280   PetscErrorCode ierr;
1281   PetscFunctionBegin;
1282   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1283   PetscFunctionReturn(0);
1284 }
1285 
1286 #undef __FUNCT__
1287 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1288 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1289 {
1290   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1291   Mat            a = baij->A,b = baij->B;
1292   PetscErrorCode ierr;
1293   PetscInt       s1,s2,s3;
1294 
1295   PetscFunctionBegin;
1296   if (ll != rr) {
1297     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1298   }
1299   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1300   if (rr) {
1301     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1302     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1303     /* Overlap communication with computation. */
1304     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1305     /*} if (ll) { */
1306     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1307     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1308     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1309     /* } */
1310   /* scale  the diagonal block */
1311   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1312 
1313   /* if (rr) { */
1314     /* Do a scatter end and then right scale the off-diagonal block */
1315     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1316     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1317   }
1318 
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1324 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1325 {
1326   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1327   MPI_Comm          comm = A->comm;
1328   static PetscTruth called = PETSC_FALSE;
1329   PetscErrorCode    ierr;
1330 
1331   PetscFunctionBegin;
1332   if (!a->rank) {
1333     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1334   }
1335   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1336   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1337   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1338   PetscFunctionReturn(0);
1339 }
1340 
1341 #undef __FUNCT__
1342 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1343 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1344 {
1345   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1346   PetscErrorCode ierr;
1347 
1348   PetscFunctionBegin;
1349   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1350   PetscFunctionReturn(0);
1351 }
1352 
1353 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatEqual_MPISBAIJ"
1357 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1358 {
1359   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1360   Mat            a,b,c,d;
1361   PetscTruth     flg;
1362   PetscErrorCode ierr;
1363 
1364   PetscFunctionBegin;
1365   a = matA->A; b = matA->B;
1366   c = matB->A; d = matB->B;
1367 
1368   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1369   if (flg == PETSC_TRUE) {
1370     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1371   }
1372   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1378 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1379 {
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1384   PetscFunctionReturn(0);
1385 }
1386 
1387 #undef __FUNCT__
1388 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1389 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1390 {
1391   PetscErrorCode ierr;
1392   PetscInt       i;
1393   PetscTruth     flg;
1394 
1395   PetscFunctionBegin;
1396   for (i=0; i<n; i++) {
1397     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1398     if (!flg) {
1399       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1400     }
1401   }
1402   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1403   PetscFunctionReturn(0);
1404 }
1405 
1406 
1407 /* -------------------------------------------------------------------*/
1408 static struct _MatOps MatOps_Values = {
1409        MatSetValues_MPISBAIJ,
1410        MatGetRow_MPISBAIJ,
1411        MatRestoreRow_MPISBAIJ,
1412        MatMult_MPISBAIJ,
1413 /* 4*/ MatMultAdd_MPISBAIJ,
1414        MatMultTranspose_MPISBAIJ,
1415        MatMultTransposeAdd_MPISBAIJ,
1416        0,
1417        0,
1418        0,
1419 /*10*/ 0,
1420        0,
1421        0,
1422        MatRelax_MPISBAIJ,
1423        MatTranspose_MPISBAIJ,
1424 /*15*/ MatGetInfo_MPISBAIJ,
1425        MatEqual_MPISBAIJ,
1426        MatGetDiagonal_MPISBAIJ,
1427        MatDiagonalScale_MPISBAIJ,
1428        MatNorm_MPISBAIJ,
1429 /*20*/ MatAssemblyBegin_MPISBAIJ,
1430        MatAssemblyEnd_MPISBAIJ,
1431        0,
1432        MatSetOption_MPISBAIJ,
1433        MatZeroEntries_MPISBAIJ,
1434 /*25*/ 0,
1435        0,
1436        0,
1437        0,
1438        0,
1439 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1440        0,
1441        0,
1442        0,
1443        0,
1444 /*35*/ MatDuplicate_MPISBAIJ,
1445        0,
1446        0,
1447        0,
1448        0,
1449 /*40*/ 0,
1450        MatGetSubMatrices_MPISBAIJ,
1451        MatIncreaseOverlap_MPISBAIJ,
1452        MatGetValues_MPISBAIJ,
1453        0,
1454 /*45*/ MatPrintHelp_MPISBAIJ,
1455        MatScale_MPISBAIJ,
1456        0,
1457        0,
1458        0,
1459 /*50*/ 0,
1460        0,
1461        0,
1462        0,
1463        0,
1464 /*55*/ 0,
1465        0,
1466        MatSetUnfactored_MPISBAIJ,
1467        0,
1468        MatSetValuesBlocked_MPISBAIJ,
1469 /*60*/ 0,
1470        0,
1471        0,
1472        MatGetPetscMaps_Petsc,
1473        0,
1474 /*65*/ 0,
1475        0,
1476        0,
1477        0,
1478        0,
1479 /*70*/ MatGetRowMax_MPISBAIJ,
1480        0,
1481        0,
1482        0,
1483        0,
1484 /*75*/ 0,
1485        0,
1486        0,
1487        0,
1488        0,
1489 /*80*/ 0,
1490        0,
1491        0,
1492        0,
1493        MatLoad_MPISBAIJ,
1494 /*85*/ 0,
1495        0,
1496        0,
1497        0,
1498        0,
1499 /*90*/ 0,
1500        0,
1501        0,
1502        0,
1503        0,
1504 /*95*/ 0,
1505        0,
1506        0,
1507        0};
1508 
1509 
1510 EXTERN_C_BEGIN
1511 #undef __FUNCT__
1512 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1513 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1514 {
1515   PetscFunctionBegin;
1516   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1517   *iscopy = PETSC_FALSE;
1518   PetscFunctionReturn(0);
1519 }
1520 EXTERN_C_END
1521 
1522 EXTERN_C_BEGIN
1523 #undef __FUNCT__
1524 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1525 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1526 {
1527   Mat_MPISBAIJ   *b;
1528   PetscErrorCode ierr;
1529   PetscInt       i,mbs,Mbs;
1530 
1531   PetscFunctionBegin;
1532   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1533 
1534   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1535   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1536   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1537   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1538   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1539   if (d_nnz) {
1540     for (i=0; i<B->m/bs; i++) {
1541       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1542     }
1543   }
1544   if (o_nnz) {
1545     for (i=0; i<B->m/bs; i++) {
1546       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1547     }
1548   }
1549   B->preallocated = PETSC_TRUE;
1550   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1551   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1552   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1553   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1554 
1555   b   = (Mat_MPISBAIJ*)B->data;
1556   mbs = B->m/bs;
1557   Mbs = B->M/bs;
1558   if (mbs*bs != B->m) {
1559     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1560   }
1561 
1562   B->bs  = bs;
1563   b->bs2 = bs*bs;
1564   b->mbs = mbs;
1565   b->nbs = mbs;
1566   b->Mbs = Mbs;
1567   b->Nbs = Mbs;
1568 
1569   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1570   b->rowners[0]    = 0;
1571   for (i=2; i<=b->size; i++) {
1572     b->rowners[i] += b->rowners[i-1];
1573   }
1574   b->rstart    = b->rowners[b->rank];
1575   b->rend      = b->rowners[b->rank+1];
1576   b->cstart    = b->rstart;
1577   b->cend      = b->rend;
1578   for (i=0; i<=b->size; i++) {
1579     b->rowners_bs[i] = b->rowners[i]*bs;
1580   }
1581   b->rstart_bs = b-> rstart*bs;
1582   b->rend_bs   = b->rend*bs;
1583 
1584   b->cstart_bs = b->cstart*bs;
1585   b->cend_bs   = b->cend*bs;
1586 
1587   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1588   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1589   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1590   PetscLogObjectParent(B,b->A);
1591 
1592   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1593   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1594   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1595   PetscLogObjectParent(B,b->B);
1596 
1597   /* build cache for off array entries formed */
1598   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1599 
1600   PetscFunctionReturn(0);
1601 }
1602 EXTERN_C_END
1603 
1604 /*MC
1605    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1606    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1607 
1608    Options Database Keys:
1609 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1610 
1611   Level: beginner
1612 
1613 .seealso: MatCreateMPISBAIJ
1614 M*/
1615 
1616 EXTERN_C_BEGIN
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatCreate_MPISBAIJ"
1619 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1620 {
1621   Mat_MPISBAIJ   *b;
1622   PetscErrorCode ierr;
1623   PetscTruth     flg;
1624 
1625   PetscFunctionBegin;
1626 
1627   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1628   B->data = (void*)b;
1629   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1630   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1631 
1632   B->ops->destroy    = MatDestroy_MPISBAIJ;
1633   B->ops->view       = MatView_MPISBAIJ;
1634   B->mapping    = 0;
1635   B->factor     = 0;
1636   B->assembled  = PETSC_FALSE;
1637 
1638   B->insertmode = NOT_SET_VALUES;
1639   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1640   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1641 
1642   /* build local table of row and column ownerships */
1643   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1644   b->cowners    = b->rowners + b->size + 2;
1645   b->rowners_bs = b->cowners + b->size + 2;
1646   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1647 
1648   /* build cache for off array entries formed */
1649   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1650   b->donotstash  = PETSC_FALSE;
1651   b->colmap      = PETSC_NULL;
1652   b->garray      = PETSC_NULL;
1653   b->roworiented = PETSC_TRUE;
1654 
1655 #if defined(PETSC_USE_MAT_SINGLE)
1656   /* stuff for MatSetValues_XXX in single precision */
1657   b->setvalueslen     = 0;
1658   b->setvaluescopy    = PETSC_NULL;
1659 #endif
1660 
1661   /* stuff used in block assembly */
1662   b->barray       = 0;
1663 
1664   /* stuff used for matrix vector multiply */
1665   b->lvec         = 0;
1666   b->Mvctx        = 0;
1667   b->slvec0       = 0;
1668   b->slvec0b      = 0;
1669   b->slvec1       = 0;
1670   b->slvec1a      = 0;
1671   b->slvec1b      = 0;
1672   b->sMvctx       = 0;
1673 
1674   /* stuff for MatGetRow() */
1675   b->rowindices   = 0;
1676   b->rowvalues    = 0;
1677   b->getrowactive = PETSC_FALSE;
1678 
1679   /* hash table stuff */
1680   b->ht           = 0;
1681   b->hd           = 0;
1682   b->ht_size      = 0;
1683   b->ht_flag      = PETSC_FALSE;
1684   b->ht_fact      = 0;
1685   b->ht_total_ct  = 0;
1686   b->ht_insert_ct = 0;
1687 
1688   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1689   if (flg) {
1690     PetscReal fact = 1.39;
1691     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1692     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1693     if (fact <= 1.0) fact = 1.39;
1694     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1695     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1696   }
1697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1698                                      "MatStoreValues_MPISBAIJ",
1699                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1701                                      "MatRetrieveValues_MPISBAIJ",
1702                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1704                                      "MatGetDiagonalBlock_MPISBAIJ",
1705                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1707                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1708                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1709   B->symmetric                  = PETSC_TRUE;
1710   B->structurally_symmetric     = PETSC_TRUE;
1711   B->symmetric_set              = PETSC_TRUE;
1712   B->structurally_symmetric_set = PETSC_TRUE;
1713   PetscFunctionReturn(0);
1714 }
1715 EXTERN_C_END
1716 
1717 /*MC
1718    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1719 
1720    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1721    and MATMPISBAIJ otherwise.
1722 
1723    Options Database Keys:
1724 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1725 
1726   Level: beginner
1727 
1728 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1729 M*/
1730 
1731 EXTERN_C_BEGIN
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatCreate_SBAIJ"
1734 PetscErrorCode MatCreate_SBAIJ(Mat A)
1735 {
1736   PetscErrorCode ierr;
1737   PetscMPIInt    size;
1738 
1739   PetscFunctionBegin;
1740   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1741   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1742   if (size == 1) {
1743     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1744   } else {
1745     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 EXTERN_C_END
1750 
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1753 /*@C
1754    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1755    the user should preallocate the matrix storage by setting the parameters
1756    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1757    performance can be increased by more than a factor of 50.
1758 
1759    Collective on Mat
1760 
1761    Input Parameters:
1762 +  A - the matrix
1763 .  bs   - size of blockk
1764 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1765            submatrix  (same for all local rows)
1766 .  d_nnz - array containing the number of block nonzeros in the various block rows
1767            in the upper triangular and diagonal part of the in diagonal portion of the local
1768            (possibly different for each block row) or PETSC_NULL.  You must leave room
1769            for the diagonal entry even if it is zero.
1770 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1771            submatrix (same for all local rows).
1772 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1773            off-diagonal portion of the local submatrix (possibly different for
1774            each block row) or PETSC_NULL.
1775 
1776 
1777    Options Database Keys:
1778 .   -mat_no_unroll - uses code that does not unroll the loops in the
1779                      block calculations (much slower)
1780 .   -mat_block_size - size of the blocks to use
1781 
1782    Notes:
1783 
1784    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1785    than it must be used on all processors that share the object for that argument.
1786 
1787    If the *_nnz parameter is given then the *_nz parameter is ignored
1788 
1789    Storage Information:
1790    For a square global matrix we define each processor's diagonal portion
1791    to be its local rows and the corresponding columns (a square submatrix);
1792    each processor's off-diagonal portion encompasses the remainder of the
1793    local matrix (a rectangular submatrix).
1794 
1795    The user can specify preallocated storage for the diagonal part of
1796    the local submatrix with either d_nz or d_nnz (not both).  Set
1797    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1798    memory allocation.  Likewise, specify preallocated storage for the
1799    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1800 
1801    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1802    the figure below we depict these three local rows and all columns (0-11).
1803 
1804 .vb
1805            0 1 2 3 4 5 6 7 8 9 10 11
1806           -------------------
1807    row 3  |  o o o d d d o o o o o o
1808    row 4  |  o o o d d d o o o o o o
1809    row 5  |  o o o d d d o o o o o o
1810           -------------------
1811 .ve
1812 
1813    Thus, any entries in the d locations are stored in the d (diagonal)
1814    submatrix, and any entries in the o locations are stored in the
1815    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1816    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1817 
1818    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1819    plus the diagonal part of the d matrix,
1820    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1821    In general, for PDE problems in which most nonzeros are near the diagonal,
1822    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1823    or you will get TERRIBLE performance; see the users' manual chapter on
1824    matrices.
1825 
1826    Level: intermediate
1827 
1828 .keywords: matrix, block, aij, compressed row, sparse, parallel
1829 
1830 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1831 @*/
1832 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1833 {
1834   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1835 
1836   PetscFunctionBegin;
1837   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1838   if (f) {
1839     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1840   }
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatCreateMPISBAIJ"
1846 /*@C
1847    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1848    (block compressed row).  For good matrix assembly performance
1849    the user should preallocate the matrix storage by setting the parameters
1850    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1851    performance can be increased by more than a factor of 50.
1852 
1853    Collective on MPI_Comm
1854 
1855    Input Parameters:
1856 +  comm - MPI communicator
1857 .  bs   - size of blockk
1858 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1859            This value should be the same as the local size used in creating the
1860            y vector for the matrix-vector product y = Ax.
1861 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1862            This value should be the same as the local size used in creating the
1863            x vector for the matrix-vector product y = Ax.
1864 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1865 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1866 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1867            submatrix  (same for all local rows)
1868 .  d_nnz - array containing the number of block nonzeros in the various block rows
1869            in the upper triangular portion of the in diagonal portion of the local
1870            (possibly different for each block block row) or PETSC_NULL.
1871            You must leave room for the diagonal entry even if it is zero.
1872 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1873            submatrix (same for all local rows).
1874 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1875            off-diagonal portion of the local submatrix (possibly different for
1876            each block row) or PETSC_NULL.
1877 
1878    Output Parameter:
1879 .  A - the matrix
1880 
1881    Options Database Keys:
1882 .   -mat_no_unroll - uses code that does not unroll the loops in the
1883                      block calculations (much slower)
1884 .   -mat_block_size - size of the blocks to use
1885 .   -mat_mpi - use the parallel matrix data structures even on one processor
1886                (defaults to using SeqBAIJ format on one processor)
1887 
1888    Notes:
1889    The user MUST specify either the local or global matrix dimensions
1890    (possibly both).
1891 
1892    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1893    than it must be used on all processors that share the object for that argument.
1894 
1895    If the *_nnz parameter is given then the *_nz parameter is ignored
1896 
1897    Storage Information:
1898    For a square global matrix we define each processor's diagonal portion
1899    to be its local rows and the corresponding columns (a square submatrix);
1900    each processor's off-diagonal portion encompasses the remainder of the
1901    local matrix (a rectangular submatrix).
1902 
1903    The user can specify preallocated storage for the diagonal part of
1904    the local submatrix with either d_nz or d_nnz (not both).  Set
1905    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1906    memory allocation.  Likewise, specify preallocated storage for the
1907    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1908 
1909    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1910    the figure below we depict these three local rows and all columns (0-11).
1911 
1912 .vb
1913            0 1 2 3 4 5 6 7 8 9 10 11
1914           -------------------
1915    row 3  |  o o o d d d o o o o o o
1916    row 4  |  o o o d d d o o o o o o
1917    row 5  |  o o o d d d o o o o o o
1918           -------------------
1919 .ve
1920 
1921    Thus, any entries in the d locations are stored in the d (diagonal)
1922    submatrix, and any entries in the o locations are stored in the
1923    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1924    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1925 
1926    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1927    plus the diagonal part of the d matrix,
1928    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1929    In general, for PDE problems in which most nonzeros are near the diagonal,
1930    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1931    or you will get TERRIBLE performance; see the users' manual chapter on
1932    matrices.
1933 
1934    Level: intermediate
1935 
1936 .keywords: matrix, block, aij, compressed row, sparse, parallel
1937 
1938 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1939 @*/
1940 
1941 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1942 {
1943   PetscErrorCode ierr;
1944   PetscMPIInt    size;
1945 
1946   PetscFunctionBegin;
1947   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1949   if (size > 1) {
1950     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1951     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1952   } else {
1953     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1954     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1955   }
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1962 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1963 {
1964   Mat            mat;
1965   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1966   PetscErrorCode ierr;
1967   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1968   PetscScalar    *array;
1969 
1970   PetscFunctionBegin;
1971   *newmat       = 0;
1972   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1973   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1974   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1975 
1976   mat->factor       = matin->factor;
1977   mat->preallocated = PETSC_TRUE;
1978   mat->assembled    = PETSC_TRUE;
1979   mat->insertmode   = NOT_SET_VALUES;
1980 
1981   a = (Mat_MPISBAIJ*)mat->data;
1982   mat->bs  = matin->bs;
1983   a->bs2   = oldmat->bs2;
1984   a->mbs   = oldmat->mbs;
1985   a->nbs   = oldmat->nbs;
1986   a->Mbs   = oldmat->Mbs;
1987   a->Nbs   = oldmat->Nbs;
1988 
1989   a->rstart       = oldmat->rstart;
1990   a->rend         = oldmat->rend;
1991   a->cstart       = oldmat->cstart;
1992   a->cend         = oldmat->cend;
1993   a->size         = oldmat->size;
1994   a->rank         = oldmat->rank;
1995   a->donotstash   = oldmat->donotstash;
1996   a->roworiented  = oldmat->roworiented;
1997   a->rowindices   = 0;
1998   a->rowvalues    = 0;
1999   a->getrowactive = PETSC_FALSE;
2000   a->barray       = 0;
2001   a->rstart_bs    = oldmat->rstart_bs;
2002   a->rend_bs      = oldmat->rend_bs;
2003   a->cstart_bs    = oldmat->cstart_bs;
2004   a->cend_bs      = oldmat->cend_bs;
2005 
2006   /* hash table stuff */
2007   a->ht           = 0;
2008   a->hd           = 0;
2009   a->ht_size      = 0;
2010   a->ht_flag      = oldmat->ht_flag;
2011   a->ht_fact      = oldmat->ht_fact;
2012   a->ht_total_ct  = 0;
2013   a->ht_insert_ct = 0;
2014 
2015   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2016   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2017   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2018   if (oldmat->colmap) {
2019 #if defined (PETSC_USE_CTABLE)
2020     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2021 #else
2022     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2023     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2024     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2025 #endif
2026   } else a->colmap = 0;
2027 
2028   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2029     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2030     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2031     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2032   } else a->garray = 0;
2033 
2034   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2035   PetscLogObjectParent(mat,a->lvec);
2036   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2037   PetscLogObjectParent(mat,a->Mvctx);
2038 
2039   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2040   PetscLogObjectParent(mat,a->slvec0);
2041   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2042   PetscLogObjectParent(mat,a->slvec1);
2043 
2044   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2045   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2046   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2047   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2048   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2049   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2050   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2051   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2052   PetscLogObjectParent(mat,a->slvec0);
2053   PetscLogObjectParent(mat,a->slvec1);
2054   PetscLogObjectParent(mat,a->slvec0b);
2055   PetscLogObjectParent(mat,a->slvec1a);
2056   PetscLogObjectParent(mat,a->slvec1b);
2057 
2058   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2059   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2060   a->sMvctx = oldmat->sMvctx;
2061   PetscLogObjectParent(mat,a->sMvctx);
2062 
2063   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2064   PetscLogObjectParent(mat,a->A);
2065   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2066   PetscLogObjectParent(mat,a->B);
2067   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2068   *newmat = mat;
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #include "petscsys.h"
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatLoad_MPISBAIJ"
2076 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2077 {
2078   Mat            A;
2079   PetscErrorCode ierr;
2080   PetscInt       i,nz,j,rstart,rend;
2081   PetscScalar    *vals,*buf;
2082   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2083   MPI_Status     status;
2084   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2085   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2086   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2087   PetscInt       bs=1,Mbs,mbs,extra_rows;
2088   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2089   PetscInt       dcount,kmax,k,nzcount,tmp;
2090   int            fd;
2091 
2092   PetscFunctionBegin;
2093   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2094 
2095   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2096   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2097   if (!rank) {
2098     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2099     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2100     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2101     if (header[3] < 0) {
2102       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2103     }
2104   }
2105 
2106   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2107   M = header[1]; N = header[2];
2108 
2109   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2110 
2111   /*
2112      This code adds extra rows to make sure the number of rows is
2113      divisible by the blocksize
2114   */
2115   Mbs        = M/bs;
2116   extra_rows = bs - M + bs*(Mbs);
2117   if (extra_rows == bs) extra_rows = 0;
2118   else                  Mbs++;
2119   if (extra_rows &&!rank) {
2120     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2121   }
2122 
2123   /* determine ownership of all rows */
2124   mbs        = Mbs/size + ((Mbs % size) > rank);
2125   m          = mbs*bs;
2126   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2127   browners   = rowners + size + 1;
2128   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2129   rowners[0] = 0;
2130   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2131   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2132   rstart = rowners[rank];
2133   rend   = rowners[rank+1];
2134 
2135   /* distribute row lengths to all processors */
2136   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2137   if (!rank) {
2138     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2139     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2140     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2141     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2142     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2143     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2144     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2145   } else {
2146     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2147   }
2148 
2149   if (!rank) {   /* procs[0] */
2150     /* calculate the number of nonzeros on each processor */
2151     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2152     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2153     for (i=0; i<size; i++) {
2154       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2155         procsnz[i] += rowlengths[j];
2156       }
2157     }
2158     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2159 
2160     /* determine max buffer needed and allocate it */
2161     maxnz = 0;
2162     for (i=0; i<size; i++) {
2163       maxnz = PetscMax(maxnz,procsnz[i]);
2164     }
2165     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2166 
2167     /* read in my part of the matrix column indices  */
2168     nz     = procsnz[0];
2169     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2170     mycols = ibuf;
2171     if (size == 1)  nz -= extra_rows;
2172     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2173     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2174 
2175     /* read in every ones (except the last) and ship off */
2176     for (i=1; i<size-1; i++) {
2177       nz   = procsnz[i];
2178       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2179       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2180     }
2181     /* read in the stuff for the last proc */
2182     if (size != 1) {
2183       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2184       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2185       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2186       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2187     }
2188     ierr = PetscFree(cols);CHKERRQ(ierr);
2189   } else {  /* procs[i], i>0 */
2190     /* determine buffer space needed for message */
2191     nz = 0;
2192     for (i=0; i<m; i++) {
2193       nz += locrowlens[i];
2194     }
2195     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2196     mycols = ibuf;
2197     /* receive message of column indices*/
2198     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2199     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2200     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2201   }
2202 
2203   /* loop over local rows, determining number of off diagonal entries */
2204   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2205   odlens   = dlens + (rend-rstart);
2206   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2207   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2208   masked1  = mask    + Mbs;
2209   masked2  = masked1 + Mbs;
2210   rowcount = 0; nzcount = 0;
2211   for (i=0; i<mbs; i++) {
2212     dcount  = 0;
2213     odcount = 0;
2214     for (j=0; j<bs; j++) {
2215       kmax = locrowlens[rowcount];
2216       for (k=0; k<kmax; k++) {
2217         tmp = mycols[nzcount++]/bs; /* block col. index */
2218         if (!mask[tmp]) {
2219           mask[tmp] = 1;
2220           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2221           else masked1[dcount++] = tmp; /* entry in diag portion */
2222         }
2223       }
2224       rowcount++;
2225     }
2226 
2227     dlens[i]  = dcount;  /* d_nzz[i] */
2228     odlens[i] = odcount; /* o_nzz[i] */
2229 
2230     /* zero out the mask elements we set */
2231     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2232     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2233   }
2234 
2235   /* create our matrix */
2236   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2237   ierr = MatSetType(A,type);CHKERRQ(ierr);
2238   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2239   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2240 
2241   if (!rank) {
2242     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2243     /* read in my part of the matrix numerical values  */
2244     nz = procsnz[0];
2245     vals = buf;
2246     mycols = ibuf;
2247     if (size == 1)  nz -= extra_rows;
2248     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2249     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2250 
2251     /* insert into matrix */
2252     jj      = rstart*bs;
2253     for (i=0; i<m; i++) {
2254       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2255       mycols += locrowlens[i];
2256       vals   += locrowlens[i];
2257       jj++;
2258     }
2259 
2260     /* read in other processors (except the last one) and ship out */
2261     for (i=1; i<size-1; i++) {
2262       nz   = procsnz[i];
2263       vals = buf;
2264       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2265       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2266     }
2267     /* the last proc */
2268     if (size != 1){
2269       nz   = procsnz[i] - extra_rows;
2270       vals = buf;
2271       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2272       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2273       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2274     }
2275     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2276 
2277   } else {
2278     /* receive numeric values */
2279     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2280 
2281     /* receive message of values*/
2282     vals   = buf;
2283     mycols = ibuf;
2284     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2285     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2287 
2288     /* insert into matrix */
2289     jj      = rstart*bs;
2290     for (i=0; i<m; i++) {
2291       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2292       mycols += locrowlens[i];
2293       vals   += locrowlens[i];
2294       jj++;
2295     }
2296   }
2297 
2298   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2299   ierr = PetscFree(buf);CHKERRQ(ierr);
2300   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2301   ierr = PetscFree(rowners);CHKERRQ(ierr);
2302   ierr = PetscFree(dlens);CHKERRQ(ierr);
2303   ierr = PetscFree(mask);CHKERRQ(ierr);
2304   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306   *newmat = A;
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2312 /*XXXXX@
2313    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2314 
2315    Input Parameters:
2316 .  mat  - the matrix
2317 .  fact - factor
2318 
2319    Collective on Mat
2320 
2321    Level: advanced
2322 
2323   Notes:
2324    This can also be set by the command line option: -mat_use_hash_table fact
2325 
2326 .keywords: matrix, hashtable, factor, HT
2327 
2328 .seealso: MatSetOption()
2329 @XXXXX*/
2330 
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2334 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2335 {
2336   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2337   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2338   PetscReal      atmp;
2339   PetscReal      *work,*svalues,*rvalues;
2340   PetscErrorCode ierr;
2341   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2342   PetscMPIInt    rank,size;
2343   PetscInt       *rowners_bs,dest,count,source;
2344   PetscScalar    *va;
2345   MatScalar      *ba;
2346   MPI_Status     stat;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351 
2352   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2353   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2354 
2355   bs   = A->bs;
2356   mbs  = a->mbs;
2357   Mbs  = a->Mbs;
2358   ba   = b->a;
2359   bi   = b->i;
2360   bj   = b->j;
2361 
2362   /* find ownerships */
2363   rowners_bs = a->rowners_bs;
2364 
2365   /* each proc creates an array to be distributed */
2366   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2367   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2368 
2369   /* row_max for B */
2370   if (rank != size-1){
2371     for (i=0; i<mbs; i++) {
2372       ncols = bi[1] - bi[0]; bi++;
2373       brow  = bs*i;
2374       for (j=0; j<ncols; j++){
2375         bcol = bs*(*bj);
2376         for (kcol=0; kcol<bs; kcol++){
2377           col = bcol + kcol;                 /* local col index */
2378           col += rowners_bs[rank+1];      /* global col index */
2379           for (krow=0; krow<bs; krow++){
2380             atmp = PetscAbsScalar(*ba); ba++;
2381             row = brow + krow;    /* local row index */
2382             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2383             if (work[col] < atmp) work[col] = atmp;
2384           }
2385         }
2386         bj++;
2387       }
2388     }
2389 
2390     /* send values to its owners */
2391     for (dest=rank+1; dest<size; dest++){
2392       svalues = work + rowners_bs[dest];
2393       count   = rowners_bs[dest+1]-rowners_bs[dest];
2394       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2395     }
2396   }
2397 
2398   /* receive values */
2399   if (rank){
2400     rvalues = work;
2401     count   = rowners_bs[rank+1]-rowners_bs[rank];
2402     for (source=0; source<rank; source++){
2403       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2404       /* process values */
2405       for (i=0; i<count; i++){
2406         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2407       }
2408     }
2409   }
2410 
2411   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2412   ierr = PetscFree(work);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 #undef __FUNCT__
2417 #define __FUNCT__ "MatRelax_MPISBAIJ"
2418 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2419 {
2420   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2421   PetscErrorCode ierr;
2422   PetscInt       mbs=mat->mbs,bs=matin->bs;
2423   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2424   Vec            bb1;
2425 
2426   PetscFunctionBegin;
2427   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2428   if (bs > 1)
2429     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2430 
2431   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2432     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2433       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2434       its--;
2435     }
2436 
2437     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2438     while (its--){
2439 
2440       /* lower triangular part: slvec0b = - B^T*xx */
2441       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2442 
2443       /* copy xx into slvec0a */
2444       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2445       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2446       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2447       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2448 
2449       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2450 
2451       /* copy bb into slvec1a */
2452       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2453       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2454       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2455       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2456 
2457       /* set slvec1b = 0 */
2458       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2459 
2460       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2461       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2462       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2463       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2464 
2465       /* upper triangular part: bb1 = bb1 - B*x */
2466       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2467 
2468       /* local diagonal sweep */
2469       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2470     }
2471     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2472   } else {
2473     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2474   }
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2480 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2481 {
2482   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2483   PetscErrorCode ierr;
2484   PetscScalar    mone=-1.0;
2485   Vec            lvec1,bb1;
2486 
2487   PetscFunctionBegin;
2488   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2489   if (matin->bs > 1)
2490     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2491 
2492   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2493     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2494       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2495       its--;
2496     }
2497 
2498     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2499     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2500     while (its--){
2501       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2502 
2503       /* lower diagonal part: bb1 = bb - B^T*xx */
2504       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2505       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2506 
2507       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2508       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2509       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2510 
2511       /* upper diagonal part: bb1 = bb1 - B*x */
2512       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2513       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2514 
2515       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2516 
2517       /* diagonal sweep */
2518       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2519     }
2520     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2521     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2522   } else {
2523     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2524   }
2525   PetscFunctionReturn(0);
2526 }
2527 
2528