xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a79aaaed103246988dafa581a233ebf306fec680)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else {
563       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
564     }
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
571 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
572 {
573   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
574   PetscErrorCode ierr;
575   PetscInt       nstash,reallocs;
576   InsertMode     addv;
577 
578   PetscFunctionBegin;
579   if (baij->donotstash) {
580     PetscFunctionReturn(0);
581   }
582 
583   /* make sure all processors are either in INSERTMODE or ADDMODE */
584   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
585   if (addv == (ADD_VALUES|INSERT_VALUES)) {
586     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
587   }
588   mat->insertmode = addv; /* in case this processor had no cache */
589 
590   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
591   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
592   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
593   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
594   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
595   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
601 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
602 {
603   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
604   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
605   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
606   PetscErrorCode ierr;
607   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
608   PetscInt       *row,*col,other_disassembled;
609   PetscMPIInt    n;
610   PetscTruth     r1,r2,r3;
611   MatScalar      *val;
612   InsertMode     addv = mat->insertmode;
613 
614   PetscFunctionBegin;
615 
616   if (!baij->donotstash) {
617     while (1) {
618       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
619       if (!flg) break;
620 
621       for (i=0; i<n;) {
622         /* Now identify the consecutive vals belonging to the same row */
623         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
624         if (j < n) ncols = j-i;
625         else       ncols = n-i;
626         /* Now assemble all these values with a single function call */
627         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
628         i = j;
629       }
630     }
631     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
632     /* Now process the block-stash. Since the values are stashed column-oriented,
633        set the roworiented flag to column oriented, and after MatSetValues()
634        restore the original flags */
635     r1 = baij->roworiented;
636     r2 = a->roworiented;
637     r3 = b->roworiented;
638     baij->roworiented = PETSC_FALSE;
639     a->roworiented    = PETSC_FALSE;
640     b->roworiented    = PETSC_FALSE;
641     while (1) {
642       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
643       if (!flg) break;
644 
645       for (i=0; i<n;) {
646         /* Now identify the consecutive vals belonging to the same row */
647         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
648         if (j < n) ncols = j-i;
649         else       ncols = n-i;
650         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
651         i = j;
652       }
653     }
654     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
655     baij->roworiented = r1;
656     a->roworiented    = r2;
657     b->roworiented    = r3;
658   }
659 
660   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675 
676   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
677     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
678   }
679   b->compressedrow.use = PETSC_TRUE;
680   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
681   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
682 
683   if (baij->rowvalues) {
684     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
685     baij->rowvalues = 0;
686   }
687 
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
693 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
694 {
695   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
696   PetscErrorCode    ierr;
697   PetscInt          bs = mat->bs;
698   PetscMPIInt       size = baij->size,rank = baij->rank;
699   PetscTruth        iascii,isdraw;
700   PetscViewer       sviewer;
701   PetscViewerFormat format;
702 
703   PetscFunctionBegin;
704   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
705   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
706   if (iascii) {
707     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
708     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
709       MatInfo info;
710       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
711       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
712       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
713               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
714               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
715       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
716       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
717       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
718       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
719       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
720       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
721       PetscFunctionReturn(0);
722     } else if (format == PETSC_VIEWER_ASCII_INFO) {
723       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
724       PetscFunctionReturn(0);
725     }
726   }
727 
728   if (isdraw) {
729     PetscDraw       draw;
730     PetscTruth isnull;
731     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
732     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
733   }
734 
735   if (size == 1) {
736     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
737     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
738   } else {
739     /* assemble the entire matrix onto first processor. */
740     Mat         A;
741     Mat_SeqSBAIJ *Aloc;
742     Mat_SeqBAIJ *Bloc;
743     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
744     MatScalar   *a;
745 
746     /* Should this be the same type as mat? */
747     if (!rank) {
748       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
749     } else {
750       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
751     }
752     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
753     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
754     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
755 
756     /* copy over the A part */
757     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
758     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
759     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
760 
761     for (i=0; i<mbs; i++) {
762       rvals[0] = bs*(baij->rstart + i);
763       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764       for (j=ai[i]; j<ai[i+1]; j++) {
765         col = (baij->cstart+aj[j])*bs;
766         for (k=0; k<bs; k++) {
767           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768           col++; a += bs;
769         }
770       }
771     }
772     /* copy over the B part */
773     Bloc = (Mat_SeqBAIJ*)baij->B->data;
774     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
775     for (i=0; i<mbs; i++) {
776       rvals[0] = bs*(baij->rstart + i);
777       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
778       for (j=ai[i]; j<ai[i+1]; j++) {
779         col = baij->garray[aj[j]]*bs;
780         for (k=0; k<bs; k++) {
781           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
782           col++; a += bs;
783         }
784       }
785     }
786     ierr = PetscFree(rvals);CHKERRQ(ierr);
787     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
789     /*
790        Everyone has to call to draw the matrix since the graphics waits are
791        synchronized across all processors that share the PetscDraw object
792     */
793     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
794     if (!rank) {
795       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
796       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
797     }
798     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
799     ierr = MatDestroy(A);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatView_MPISBAIJ"
806 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
807 {
808   PetscErrorCode ierr;
809   PetscTruth     iascii,isdraw,issocket,isbinary;
810 
811   PetscFunctionBegin;
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
815   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
816   if (iascii || isdraw || issocket || isbinary) {
817     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
818   } else {
819     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_MPISBAIJ"
826 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
827 {
828   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
834 #endif
835   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
836   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
837   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
838   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
839   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
840 #if defined (PETSC_USE_CTABLE)
841   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
842 #else
843   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
844 #endif
845   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
846   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
847   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
848   if (baij->slvec0) {
849     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
850     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
851   }
852   if (baij->slvec1) {
853     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
854     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
855     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
856   }
857   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
858   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
859   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
860   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
861 #if defined(PETSC_USE_MAT_SINGLE)
862   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
863 #endif
864   ierr = PetscFree(baij);CHKERRQ(ierr);
865 
866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
869   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatMult_MPISBAIJ"
875 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
876 {
877   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
878   PetscErrorCode ierr;
879   PetscInt       nt,mbs=a->mbs,bs=A->bs;
880   PetscScalar    *x,*from,zero=0.0;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->n) {
885     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
886   }
887   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
888   if (nt != A->m) {
889     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
890   }
891 
892   /* diagonal part */
893   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
894   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
895 
896   /* subdiagonal part */
897   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
898 
899   /* copy x into the vec slvec0 */
900   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
901   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
902   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
903   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
904 
905   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
908 
909   /* supperdiagonal part */
910   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
911 
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
917 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
918 {
919   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
920   PetscErrorCode ierr;
921   PetscInt       nt;
922 
923   PetscFunctionBegin;
924   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
925   if (nt != A->n) {
926     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
927   }
928   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
929   if (nt != A->m) {
930     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
931   }
932 
933   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
934   /* do diagonal part */
935   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
936   /* do supperdiagonal part */
937   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
938   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
939   /* do subdiagonal part */
940   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
941   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
942   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
943 
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
949 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
950 {
951   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       mbs=a->mbs,bs=A->bs;
954   PetscScalar    *x,*from,zero=0.0;
955 
956   PetscFunctionBegin;
957   /*
958   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
959   PetscSynchronizedFlush(A->comm);
960   */
961   /* diagonal part */
962   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
963   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
964 
965   /* subdiagonal part */
966   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
967 
968   /* copy x into the vec slvec0 */
969   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
970   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
971   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
972   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
973 
974   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
975   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
976   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
977 
978   /* supperdiagonal part */
979   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
986 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
987 {
988   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
993   /* do diagonal part */
994   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
995   /* do supperdiagonal part */
996   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
997   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
998 
999   /* do subdiagonal part */
1000   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1001   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1003 
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1009 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1020 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /*
1030   This only works correctly for square matrices where the subblock A->A is the
1031    diagonal block
1032 */
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1035 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1036 {
1037   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1042   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatScale_MPISBAIJ"
1048 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1049 {
1050   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1055   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1061 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1062 {
1063   PetscFunctionBegin;
1064   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) {
1076     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1077   }
1078   baij->getrowactive = PETSC_FALSE;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1084 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1085 {
1086   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1087   PetscErrorCode ierr;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1091   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1097 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1098 {
1099   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1100   Mat            A = a->A,B = a->B;
1101   PetscErrorCode ierr;
1102   PetscReal      isend[5],irecv[5];
1103 
1104   PetscFunctionBegin;
1105   info->block_size     = (PetscReal)matin->bs;
1106   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1107   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1108   isend[3] = info->memory;  isend[4] = info->mallocs;
1109   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1110   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1111   isend[3] += info->memory;  isend[4] += info->mallocs;
1112   if (flag == MAT_LOCAL) {
1113     info->nz_used      = isend[0];
1114     info->nz_allocated = isend[1];
1115     info->nz_unneeded  = isend[2];
1116     info->memory       = isend[3];
1117     info->mallocs      = isend[4];
1118   } else if (flag == MAT_GLOBAL_MAX) {
1119     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1120     info->nz_used      = irecv[0];
1121     info->nz_allocated = irecv[1];
1122     info->nz_unneeded  = irecv[2];
1123     info->memory       = irecv[3];
1124     info->mallocs      = irecv[4];
1125   } else if (flag == MAT_GLOBAL_SUM) {
1126     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1127     info->nz_used      = irecv[0];
1128     info->nz_allocated = irecv[1];
1129     info->nz_unneeded  = irecv[2];
1130     info->memory       = irecv[3];
1131     info->mallocs      = irecv[4];
1132   } else {
1133     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1134   }
1135   info->rows_global       = (PetscReal)A->M;
1136   info->columns_global    = (PetscReal)A->N;
1137   info->rows_local        = (PetscReal)A->m;
1138   info->columns_local     = (PetscReal)A->N;
1139   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1140   info->fill_ratio_needed = 0;
1141   info->factor_mallocs    = 0;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1147 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1148 {
1149   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   switch (op) {
1154   case MAT_NO_NEW_NONZERO_LOCATIONS:
1155   case MAT_YES_NEW_NONZERO_LOCATIONS:
1156   case MAT_COLUMNS_UNSORTED:
1157   case MAT_COLUMNS_SORTED:
1158   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1159   case MAT_KEEP_ZEROED_ROWS:
1160   case MAT_NEW_NONZERO_LOCATION_ERR:
1161     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1162     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1163     break;
1164   case MAT_ROW_ORIENTED:
1165     a->roworiented = PETSC_TRUE;
1166     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1167     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1168     break;
1169   case MAT_ROWS_SORTED:
1170   case MAT_ROWS_UNSORTED:
1171   case MAT_YES_NEW_DIAGONALS:
1172     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1173     break;
1174   case MAT_COLUMN_ORIENTED:
1175     a->roworiented = PETSC_FALSE;
1176     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1178     break;
1179   case MAT_IGNORE_OFF_PROC_ENTRIES:
1180     a->donotstash = PETSC_TRUE;
1181     break;
1182   case MAT_NO_NEW_DIAGONALS:
1183     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1184   case MAT_USE_HASH_TABLE:
1185     a->ht_flag = PETSC_TRUE;
1186     break;
1187   case MAT_NOT_SYMMETRIC:
1188   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1189   case MAT_HERMITIAN:
1190     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1191   case MAT_SYMMETRIC:
1192   case MAT_STRUCTURALLY_SYMMETRIC:
1193   case MAT_NOT_HERMITIAN:
1194   case MAT_SYMMETRY_ETERNAL:
1195   case MAT_NOT_SYMMETRY_ETERNAL:
1196     break;
1197   default:
1198     SETERRQ(PETSC_ERR_SUP,"unknown option");
1199   }
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1205 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1206 {
1207   PetscErrorCode ierr;
1208   PetscFunctionBegin;
1209   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1215 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1216 {
1217   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1218   Mat            a=baij->A, b=baij->B;
1219   PetscErrorCode ierr;
1220   PetscInt       nv,m,n;
1221   PetscTruth     flg;
1222 
1223   PetscFunctionBegin;
1224   if (ll != rr){
1225     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1226     if (!flg)
1227       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1228   }
1229   if (!ll) PetscFunctionReturn(0);
1230 
1231   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1232   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1233 
1234   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1235   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1236 
1237   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1238 
1239   /* left diagonalscale the off-diagonal part */
1240   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1241 
1242   /* scale the diagonal part */
1243   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1244 
1245   /* right diagonalscale the off-diagonal part */
1246   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1247   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1253 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1254 {
1255   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1256   MPI_Comm          comm = A->comm;
1257   static PetscTruth called = PETSC_FALSE;
1258   PetscErrorCode    ierr;
1259 
1260   PetscFunctionBegin;
1261   if (!a->rank) {
1262     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1263   }
1264   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1265   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1266   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1272 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1273 {
1274   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatEqual_MPISBAIJ"
1286 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1287 {
1288   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1289   Mat            a,b,c,d;
1290   PetscTruth     flg;
1291   PetscErrorCode ierr;
1292 
1293   PetscFunctionBegin;
1294   a = matA->A; b = matA->B;
1295   c = matB->A; d = matB->B;
1296 
1297   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1298   if (flg) {
1299     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1300   }
1301   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1307 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1308 {
1309   PetscErrorCode ierr;
1310 
1311   PetscFunctionBegin;
1312   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1318 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1319 {
1320   PetscErrorCode ierr;
1321   PetscInt       i;
1322   PetscTruth     flg;
1323 
1324   PetscFunctionBegin;
1325   for (i=0; i<n; i++) {
1326     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1327     if (!flg) {
1328       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1329     }
1330   }
1331   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 
1336 /* -------------------------------------------------------------------*/
1337 static struct _MatOps MatOps_Values = {
1338        MatSetValues_MPISBAIJ,
1339        MatGetRow_MPISBAIJ,
1340        MatRestoreRow_MPISBAIJ,
1341        MatMult_MPISBAIJ,
1342 /* 4*/ MatMultAdd_MPISBAIJ,
1343        MatMultTranspose_MPISBAIJ,
1344        MatMultTransposeAdd_MPISBAIJ,
1345        0,
1346        0,
1347        0,
1348 /*10*/ 0,
1349        0,
1350        0,
1351        MatRelax_MPISBAIJ,
1352        MatTranspose_MPISBAIJ,
1353 /*15*/ MatGetInfo_MPISBAIJ,
1354        MatEqual_MPISBAIJ,
1355        MatGetDiagonal_MPISBAIJ,
1356        MatDiagonalScale_MPISBAIJ,
1357        MatNorm_MPISBAIJ,
1358 /*20*/ MatAssemblyBegin_MPISBAIJ,
1359        MatAssemblyEnd_MPISBAIJ,
1360        0,
1361        MatSetOption_MPISBAIJ,
1362        MatZeroEntries_MPISBAIJ,
1363 /*25*/ 0,
1364        0,
1365        0,
1366        0,
1367        0,
1368 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1369        0,
1370        0,
1371        0,
1372        0,
1373 /*35*/ MatDuplicate_MPISBAIJ,
1374        0,
1375        0,
1376        0,
1377        0,
1378 /*40*/ 0,
1379        MatGetSubMatrices_MPISBAIJ,
1380        MatIncreaseOverlap_MPISBAIJ,
1381        MatGetValues_MPISBAIJ,
1382        0,
1383 /*45*/ MatPrintHelp_MPISBAIJ,
1384        MatScale_MPISBAIJ,
1385        0,
1386        0,
1387        0,
1388 /*50*/ 0,
1389        0,
1390        0,
1391        0,
1392        0,
1393 /*55*/ 0,
1394        0,
1395        MatSetUnfactored_MPISBAIJ,
1396        0,
1397        MatSetValuesBlocked_MPISBAIJ,
1398 /*60*/ 0,
1399        0,
1400        0,
1401        MatGetPetscMaps_Petsc,
1402        0,
1403 /*65*/ 0,
1404        0,
1405        0,
1406        0,
1407        0,
1408 /*70*/ MatGetRowMax_MPISBAIJ,
1409        0,
1410        0,
1411        0,
1412        0,
1413 /*75*/ 0,
1414        0,
1415        0,
1416        0,
1417        0,
1418 /*80*/ 0,
1419        0,
1420        0,
1421        0,
1422        MatLoad_MPISBAIJ,
1423 /*85*/ 0,
1424        0,
1425        0,
1426        0,
1427        0,
1428 /*90*/ 0,
1429        0,
1430        0,
1431        0,
1432        0,
1433 /*95*/ 0,
1434        0,
1435        0,
1436        0};
1437 
1438 
1439 EXTERN_C_BEGIN
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1442 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1443 {
1444   PetscFunctionBegin;
1445   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1446   *iscopy = PETSC_FALSE;
1447   PetscFunctionReturn(0);
1448 }
1449 EXTERN_C_END
1450 
1451 EXTERN_C_BEGIN
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1454 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1455 {
1456   Mat_MPISBAIJ   *b;
1457   PetscErrorCode ierr;
1458   PetscInt       i,mbs,Mbs;
1459 
1460   PetscFunctionBegin;
1461   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1462 
1463   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1464   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1465   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1466   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1467   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1468   if (d_nnz) {
1469     for (i=0; i<B->m/bs; i++) {
1470       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1471     }
1472   }
1473   if (o_nnz) {
1474     for (i=0; i<B->m/bs; i++) {
1475       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1476     }
1477   }
1478   B->preallocated = PETSC_TRUE;
1479   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1480   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1481   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1482   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1483 
1484   b   = (Mat_MPISBAIJ*)B->data;
1485   mbs = B->m/bs;
1486   Mbs = B->M/bs;
1487   if (mbs*bs != B->m) {
1488     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1489   }
1490 
1491   B->bs  = bs;
1492   b->bs2 = bs*bs;
1493   b->mbs = mbs;
1494   b->nbs = mbs;
1495   b->Mbs = Mbs;
1496   b->Nbs = Mbs;
1497 
1498   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1499   b->rowners[0]    = 0;
1500   for (i=2; i<=b->size; i++) {
1501     b->rowners[i] += b->rowners[i-1];
1502   }
1503   b->rstart    = b->rowners[b->rank];
1504   b->rend      = b->rowners[b->rank+1];
1505   b->cstart    = b->rstart;
1506   b->cend      = b->rend;
1507   for (i=0; i<=b->size; i++) {
1508     b->rowners_bs[i] = b->rowners[i]*bs;
1509   }
1510   b->rstart_bs = b-> rstart*bs;
1511   b->rend_bs   = b->rend*bs;
1512 
1513   b->cstart_bs = b->cstart*bs;
1514   b->cend_bs   = b->cend*bs;
1515 
1516   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1517   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1518   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1519   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1520 
1521   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1522   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1523   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1524   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1525 
1526   /* build cache for off array entries formed */
1527   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1528 
1529   PetscFunctionReturn(0);
1530 }
1531 EXTERN_C_END
1532 
1533 /*MC
1534    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1535    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1536 
1537    Options Database Keys:
1538 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1539 
1540   Level: beginner
1541 
1542 .seealso: MatCreateMPISBAIJ
1543 M*/
1544 
1545 EXTERN_C_BEGIN
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatCreate_MPISBAIJ"
1548 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1549 {
1550   Mat_MPISBAIJ   *b;
1551   PetscErrorCode ierr;
1552   PetscTruth     flg;
1553 
1554   PetscFunctionBegin;
1555 
1556   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1557   B->data = (void*)b;
1558   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1559 
1560   B->ops->destroy    = MatDestroy_MPISBAIJ;
1561   B->ops->view       = MatView_MPISBAIJ;
1562   B->mapping    = 0;
1563   B->factor     = 0;
1564   B->assembled  = PETSC_FALSE;
1565 
1566   B->insertmode = NOT_SET_VALUES;
1567   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1568   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1569 
1570   /* build local table of row and column ownerships */
1571   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1572   b->cowners    = b->rowners + b->size + 2;
1573   b->rowners_bs = b->cowners + b->size + 2;
1574   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1575 
1576   /* build cache for off array entries formed */
1577   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1578   b->donotstash  = PETSC_FALSE;
1579   b->colmap      = PETSC_NULL;
1580   b->garray      = PETSC_NULL;
1581   b->roworiented = PETSC_TRUE;
1582 
1583 #if defined(PETSC_USE_MAT_SINGLE)
1584   /* stuff for MatSetValues_XXX in single precision */
1585   b->setvalueslen     = 0;
1586   b->setvaluescopy    = PETSC_NULL;
1587 #endif
1588 
1589   /* stuff used in block assembly */
1590   b->barray       = 0;
1591 
1592   /* stuff used for matrix vector multiply */
1593   b->lvec         = 0;
1594   b->Mvctx        = 0;
1595   b->slvec0       = 0;
1596   b->slvec0b      = 0;
1597   b->slvec1       = 0;
1598   b->slvec1a      = 0;
1599   b->slvec1b      = 0;
1600   b->sMvctx       = 0;
1601 
1602   /* stuff for MatGetRow() */
1603   b->rowindices   = 0;
1604   b->rowvalues    = 0;
1605   b->getrowactive = PETSC_FALSE;
1606 
1607   /* hash table stuff */
1608   b->ht           = 0;
1609   b->hd           = 0;
1610   b->ht_size      = 0;
1611   b->ht_flag      = PETSC_FALSE;
1612   b->ht_fact      = 0;
1613   b->ht_total_ct  = 0;
1614   b->ht_insert_ct = 0;
1615 
1616   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1617   if (flg) {
1618     PetscReal fact = 1.39;
1619     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1620     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1621     if (fact <= 1.0) fact = 1.39;
1622     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1623     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1624   }
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1626                                      "MatStoreValues_MPISBAIJ",
1627                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1629                                      "MatRetrieveValues_MPISBAIJ",
1630                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1632                                      "MatGetDiagonalBlock_MPISBAIJ",
1633                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1635                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1636                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1637   B->symmetric                  = PETSC_TRUE;
1638   B->structurally_symmetric     = PETSC_TRUE;
1639   B->symmetric_set              = PETSC_TRUE;
1640   B->structurally_symmetric_set = PETSC_TRUE;
1641   PetscFunctionReturn(0);
1642 }
1643 EXTERN_C_END
1644 
1645 /*MC
1646    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1647 
1648    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1649    and MATMPISBAIJ otherwise.
1650 
1651    Options Database Keys:
1652 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1653 
1654   Level: beginner
1655 
1656 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1657 M*/
1658 
1659 EXTERN_C_BEGIN
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatCreate_SBAIJ"
1662 PetscErrorCode MatCreate_SBAIJ(Mat A)
1663 {
1664   PetscErrorCode ierr;
1665   PetscMPIInt    size;
1666 
1667   PetscFunctionBegin;
1668   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1669   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1670   if (size == 1) {
1671     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1672   } else {
1673     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1674   }
1675   PetscFunctionReturn(0);
1676 }
1677 EXTERN_C_END
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1681 /*@C
1682    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1683    the user should preallocate the matrix storage by setting the parameters
1684    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1685    performance can be increased by more than a factor of 50.
1686 
1687    Collective on Mat
1688 
1689    Input Parameters:
1690 +  A - the matrix
1691 .  bs   - size of blockk
1692 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1693            submatrix  (same for all local rows)
1694 .  d_nnz - array containing the number of block nonzeros in the various block rows
1695            in the upper triangular and diagonal part of the in diagonal portion of the local
1696            (possibly different for each block row) or PETSC_NULL.  You must leave room
1697            for the diagonal entry even if it is zero.
1698 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1699            submatrix (same for all local rows).
1700 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1701            off-diagonal portion of the local submatrix (possibly different for
1702            each block row) or PETSC_NULL.
1703 
1704 
1705    Options Database Keys:
1706 .   -mat_no_unroll - uses code that does not unroll the loops in the
1707                      block calculations (much slower)
1708 .   -mat_block_size - size of the blocks to use
1709 
1710    Notes:
1711 
1712    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1713    than it must be used on all processors that share the object for that argument.
1714 
1715    If the *_nnz parameter is given then the *_nz parameter is ignored
1716 
1717    Storage Information:
1718    For a square global matrix we define each processor's diagonal portion
1719    to be its local rows and the corresponding columns (a square submatrix);
1720    each processor's off-diagonal portion encompasses the remainder of the
1721    local matrix (a rectangular submatrix).
1722 
1723    The user can specify preallocated storage for the diagonal part of
1724    the local submatrix with either d_nz or d_nnz (not both).  Set
1725    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1726    memory allocation.  Likewise, specify preallocated storage for the
1727    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1728 
1729    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1730    the figure below we depict these three local rows and all columns (0-11).
1731 
1732 .vb
1733            0 1 2 3 4 5 6 7 8 9 10 11
1734           -------------------
1735    row 3  |  o o o d d d o o o o o o
1736    row 4  |  o o o d d d o o o o o o
1737    row 5  |  o o o d d d o o o o o o
1738           -------------------
1739 .ve
1740 
1741    Thus, any entries in the d locations are stored in the d (diagonal)
1742    submatrix, and any entries in the o locations are stored in the
1743    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1744    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1745 
1746    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1747    plus the diagonal part of the d matrix,
1748    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1749    In general, for PDE problems in which most nonzeros are near the diagonal,
1750    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1751    or you will get TERRIBLE performance; see the users' manual chapter on
1752    matrices.
1753 
1754    Level: intermediate
1755 
1756 .keywords: matrix, block, aij, compressed row, sparse, parallel
1757 
1758 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1759 @*/
1760 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1761 {
1762   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1763 
1764   PetscFunctionBegin;
1765   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1766   if (f) {
1767     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1768   }
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatCreateMPISBAIJ"
1774 /*@C
1775    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1776    (block compressed row).  For good matrix assembly performance
1777    the user should preallocate the matrix storage by setting the parameters
1778    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1779    performance can be increased by more than a factor of 50.
1780 
1781    Collective on MPI_Comm
1782 
1783    Input Parameters:
1784 +  comm - MPI communicator
1785 .  bs   - size of blockk
1786 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1787            This value should be the same as the local size used in creating the
1788            y vector for the matrix-vector product y = Ax.
1789 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1790            This value should be the same as the local size used in creating the
1791            x vector for the matrix-vector product y = Ax.
1792 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1793 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1794 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1795            submatrix  (same for all local rows)
1796 .  d_nnz - array containing the number of block nonzeros in the various block rows
1797            in the upper triangular portion of the in diagonal portion of the local
1798            (possibly different for each block block row) or PETSC_NULL.
1799            You must leave room for the diagonal entry even if it is zero.
1800 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1801            submatrix (same for all local rows).
1802 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1803            off-diagonal portion of the local submatrix (possibly different for
1804            each block row) or PETSC_NULL.
1805 
1806    Output Parameter:
1807 .  A - the matrix
1808 
1809    Options Database Keys:
1810 .   -mat_no_unroll - uses code that does not unroll the loops in the
1811                      block calculations (much slower)
1812 .   -mat_block_size - size of the blocks to use
1813 .   -mat_mpi - use the parallel matrix data structures even on one processor
1814                (defaults to using SeqBAIJ format on one processor)
1815 
1816    Notes:
1817    The number of rows and columns must be divisible by blocksize.
1818 
1819    The user MUST specify either the local or global matrix dimensions
1820    (possibly both).
1821 
1822    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1823    than it must be used on all processors that share the object for that argument.
1824 
1825    If the *_nnz parameter is given then the *_nz parameter is ignored
1826 
1827    Storage Information:
1828    For a square global matrix we define each processor's diagonal portion
1829    to be its local rows and the corresponding columns (a square submatrix);
1830    each processor's off-diagonal portion encompasses the remainder of the
1831    local matrix (a rectangular submatrix).
1832 
1833    The user can specify preallocated storage for the diagonal part of
1834    the local submatrix with either d_nz or d_nnz (not both).  Set
1835    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1836    memory allocation.  Likewise, specify preallocated storage for the
1837    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1838 
1839    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1840    the figure below we depict these three local rows and all columns (0-11).
1841 
1842 .vb
1843            0 1 2 3 4 5 6 7 8 9 10 11
1844           -------------------
1845    row 3  |  o o o d d d o o o o o o
1846    row 4  |  o o o d d d o o o o o o
1847    row 5  |  o o o d d d o o o o o o
1848           -------------------
1849 .ve
1850 
1851    Thus, any entries in the d locations are stored in the d (diagonal)
1852    submatrix, and any entries in the o locations are stored in the
1853    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1854    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1855 
1856    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1857    plus the diagonal part of the d matrix,
1858    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1859    In general, for PDE problems in which most nonzeros are near the diagonal,
1860    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1861    or you will get TERRIBLE performance; see the users' manual chapter on
1862    matrices.
1863 
1864    Level: intermediate
1865 
1866 .keywords: matrix, block, aij, compressed row, sparse, parallel
1867 
1868 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1869 @*/
1870 
1871 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1872 {
1873   PetscErrorCode ierr;
1874   PetscMPIInt    size;
1875 
1876   PetscFunctionBegin;
1877   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1878   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1879   if (size > 1) {
1880     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1881     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1882   } else {
1883     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1884     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1885   }
1886   PetscFunctionReturn(0);
1887 }
1888 
1889 
1890 #undef __FUNCT__
1891 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1892 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1893 {
1894   Mat            mat;
1895   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1896   PetscErrorCode ierr;
1897   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1898   PetscScalar    *array;
1899 
1900   PetscFunctionBegin;
1901   *newmat       = 0;
1902   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1903   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1904   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1905 
1906   mat->factor       = matin->factor;
1907   mat->preallocated = PETSC_TRUE;
1908   mat->assembled    = PETSC_TRUE;
1909   mat->insertmode   = NOT_SET_VALUES;
1910 
1911   a = (Mat_MPISBAIJ*)mat->data;
1912   mat->bs  = matin->bs;
1913   a->bs2   = oldmat->bs2;
1914   a->mbs   = oldmat->mbs;
1915   a->nbs   = oldmat->nbs;
1916   a->Mbs   = oldmat->Mbs;
1917   a->Nbs   = oldmat->Nbs;
1918 
1919   a->rstart       = oldmat->rstart;
1920   a->rend         = oldmat->rend;
1921   a->cstart       = oldmat->cstart;
1922   a->cend         = oldmat->cend;
1923   a->size         = oldmat->size;
1924   a->rank         = oldmat->rank;
1925   a->donotstash   = oldmat->donotstash;
1926   a->roworiented  = oldmat->roworiented;
1927   a->rowindices   = 0;
1928   a->rowvalues    = 0;
1929   a->getrowactive = PETSC_FALSE;
1930   a->barray       = 0;
1931   a->rstart_bs    = oldmat->rstart_bs;
1932   a->rend_bs      = oldmat->rend_bs;
1933   a->cstart_bs    = oldmat->cstart_bs;
1934   a->cend_bs      = oldmat->cend_bs;
1935 
1936   /* hash table stuff */
1937   a->ht           = 0;
1938   a->hd           = 0;
1939   a->ht_size      = 0;
1940   a->ht_flag      = oldmat->ht_flag;
1941   a->ht_fact      = oldmat->ht_fact;
1942   a->ht_total_ct  = 0;
1943   a->ht_insert_ct = 0;
1944 
1945   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1946   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1947   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1948   if (oldmat->colmap) {
1949 #if defined (PETSC_USE_CTABLE)
1950     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1951 #else
1952     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1953     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1954     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1955 #endif
1956   } else a->colmap = 0;
1957 
1958   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1959     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1960     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1961     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1962   } else a->garray = 0;
1963 
1964   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1965   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1966   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1967   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1968 
1969   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1970   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1971   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1972   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1973 
1974   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1975   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1976   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1977   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1978   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1979   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1980   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1981   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1982   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1983   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1984   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
1985   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
1986   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
1987 
1988   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1989   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1990   a->sMvctx = oldmat->sMvctx;
1991   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
1992 
1993   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1994   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1995   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1996   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1997   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1998   *newmat = mat;
1999   PetscFunctionReturn(0);
2000 }
2001 
2002 #include "petscsys.h"
2003 
2004 #undef __FUNCT__
2005 #define __FUNCT__ "MatLoad_MPISBAIJ"
2006 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2007 {
2008   Mat            A;
2009   PetscErrorCode ierr;
2010   PetscInt       i,nz,j,rstart,rend;
2011   PetscScalar    *vals,*buf;
2012   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2013   MPI_Status     status;
2014   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2015   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2016   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2017   PetscInt       bs=1,Mbs,mbs,extra_rows;
2018   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2019   PetscInt       dcount,kmax,k,nzcount,tmp;
2020   int            fd;
2021 
2022   PetscFunctionBegin;
2023   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2024 
2025   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2026   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2027   if (!rank) {
2028     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2029     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2030     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2031     if (header[3] < 0) {
2032       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2033     }
2034   }
2035 
2036   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2037   M = header[1]; N = header[2];
2038 
2039   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2040 
2041   /*
2042      This code adds extra rows to make sure the number of rows is
2043      divisible by the blocksize
2044   */
2045   Mbs        = M/bs;
2046   extra_rows = bs - M + bs*(Mbs);
2047   if (extra_rows == bs) extra_rows = 0;
2048   else                  Mbs++;
2049   if (extra_rows &&!rank) {
2050     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2051   }
2052 
2053   /* determine ownership of all rows */
2054   mbs        = Mbs/size + ((Mbs % size) > rank);
2055   m          = mbs*bs;
2056   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2057   browners   = rowners + size + 1;
2058   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2059   rowners[0] = 0;
2060   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2061   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2062   rstart = rowners[rank];
2063   rend   = rowners[rank+1];
2064 
2065   /* distribute row lengths to all processors */
2066   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2067   if (!rank) {
2068     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2069     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2070     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2071     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2072     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2073     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2074     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2075   } else {
2076     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2077   }
2078 
2079   if (!rank) {   /* procs[0] */
2080     /* calculate the number of nonzeros on each processor */
2081     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2082     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2083     for (i=0; i<size; i++) {
2084       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2085         procsnz[i] += rowlengths[j];
2086       }
2087     }
2088     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2089 
2090     /* determine max buffer needed and allocate it */
2091     maxnz = 0;
2092     for (i=0; i<size; i++) {
2093       maxnz = PetscMax(maxnz,procsnz[i]);
2094     }
2095     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2096 
2097     /* read in my part of the matrix column indices  */
2098     nz     = procsnz[0];
2099     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2100     mycols = ibuf;
2101     if (size == 1)  nz -= extra_rows;
2102     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2103     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2104 
2105     /* read in every ones (except the last) and ship off */
2106     for (i=1; i<size-1; i++) {
2107       nz   = procsnz[i];
2108       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2109       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2110     }
2111     /* read in the stuff for the last proc */
2112     if (size != 1) {
2113       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2114       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2115       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2116       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2117     }
2118     ierr = PetscFree(cols);CHKERRQ(ierr);
2119   } else {  /* procs[i], i>0 */
2120     /* determine buffer space needed for message */
2121     nz = 0;
2122     for (i=0; i<m; i++) {
2123       nz += locrowlens[i];
2124     }
2125     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2126     mycols = ibuf;
2127     /* receive message of column indices*/
2128     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2129     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2130     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2131   }
2132 
2133   /* loop over local rows, determining number of off diagonal entries */
2134   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2135   odlens   = dlens + (rend-rstart);
2136   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2137   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2138   masked1  = mask    + Mbs;
2139   masked2  = masked1 + Mbs;
2140   rowcount = 0; nzcount = 0;
2141   for (i=0; i<mbs; i++) {
2142     dcount  = 0;
2143     odcount = 0;
2144     for (j=0; j<bs; j++) {
2145       kmax = locrowlens[rowcount];
2146       for (k=0; k<kmax; k++) {
2147         tmp = mycols[nzcount++]/bs; /* block col. index */
2148         if (!mask[tmp]) {
2149           mask[tmp] = 1;
2150           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2151           else masked1[dcount++] = tmp; /* entry in diag portion */
2152         }
2153       }
2154       rowcount++;
2155     }
2156 
2157     dlens[i]  = dcount;  /* d_nzz[i] */
2158     odlens[i] = odcount; /* o_nzz[i] */
2159 
2160     /* zero out the mask elements we set */
2161     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2162     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2163   }
2164 
2165   /* create our matrix */
2166   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2167   ierr = MatSetType(A,type);CHKERRQ(ierr);
2168   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2169   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2170 
2171   if (!rank) {
2172     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2173     /* read in my part of the matrix numerical values  */
2174     nz = procsnz[0];
2175     vals = buf;
2176     mycols = ibuf;
2177     if (size == 1)  nz -= extra_rows;
2178     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2179     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2180 
2181     /* insert into matrix */
2182     jj      = rstart*bs;
2183     for (i=0; i<m; i++) {
2184       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2185       mycols += locrowlens[i];
2186       vals   += locrowlens[i];
2187       jj++;
2188     }
2189 
2190     /* read in other processors (except the last one) and ship out */
2191     for (i=1; i<size-1; i++) {
2192       nz   = procsnz[i];
2193       vals = buf;
2194       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2195       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2196     }
2197     /* the last proc */
2198     if (size != 1){
2199       nz   = procsnz[i] - extra_rows;
2200       vals = buf;
2201       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2202       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2203       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2204     }
2205     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2206 
2207   } else {
2208     /* receive numeric values */
2209     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2210 
2211     /* receive message of values*/
2212     vals   = buf;
2213     mycols = ibuf;
2214     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2215     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2216     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2217 
2218     /* insert into matrix */
2219     jj      = rstart*bs;
2220     for (i=0; i<m; i++) {
2221       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2222       mycols += locrowlens[i];
2223       vals   += locrowlens[i];
2224       jj++;
2225     }
2226   }
2227 
2228   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2229   ierr = PetscFree(buf);CHKERRQ(ierr);
2230   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2231   ierr = PetscFree(rowners);CHKERRQ(ierr);
2232   ierr = PetscFree(dlens);CHKERRQ(ierr);
2233   ierr = PetscFree(mask);CHKERRQ(ierr);
2234   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2235   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2236   *newmat = A;
2237   PetscFunctionReturn(0);
2238 }
2239 
2240 #undef __FUNCT__
2241 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2242 /*XXXXX@
2243    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2244 
2245    Input Parameters:
2246 .  mat  - the matrix
2247 .  fact - factor
2248 
2249    Collective on Mat
2250 
2251    Level: advanced
2252 
2253   Notes:
2254    This can also be set by the command line option: -mat_use_hash_table fact
2255 
2256 .keywords: matrix, hashtable, factor, HT
2257 
2258 .seealso: MatSetOption()
2259 @XXXXX*/
2260 
2261 
2262 #undef __FUNCT__
2263 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2264 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2265 {
2266   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2267   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2268   PetscReal      atmp;
2269   PetscReal      *work,*svalues,*rvalues;
2270   PetscErrorCode ierr;
2271   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2272   PetscMPIInt    rank,size;
2273   PetscInt       *rowners_bs,dest,count,source;
2274   PetscScalar    *va;
2275   MatScalar      *ba;
2276   MPI_Status     stat;
2277 
2278   PetscFunctionBegin;
2279   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2280   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2281 
2282   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2283   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2284 
2285   bs   = A->bs;
2286   mbs  = a->mbs;
2287   Mbs  = a->Mbs;
2288   ba   = b->a;
2289   bi   = b->i;
2290   bj   = b->j;
2291 
2292   /* find ownerships */
2293   rowners_bs = a->rowners_bs;
2294 
2295   /* each proc creates an array to be distributed */
2296   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2297   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2298 
2299   /* row_max for B */
2300   if (rank != size-1){
2301     for (i=0; i<mbs; i++) {
2302       ncols = bi[1] - bi[0]; bi++;
2303       brow  = bs*i;
2304       for (j=0; j<ncols; j++){
2305         bcol = bs*(*bj);
2306         for (kcol=0; kcol<bs; kcol++){
2307           col = bcol + kcol;                 /* local col index */
2308           col += rowners_bs[rank+1];      /* global col index */
2309           for (krow=0; krow<bs; krow++){
2310             atmp = PetscAbsScalar(*ba); ba++;
2311             row = brow + krow;    /* local row index */
2312             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2313             if (work[col] < atmp) work[col] = atmp;
2314           }
2315         }
2316         bj++;
2317       }
2318     }
2319 
2320     /* send values to its owners */
2321     for (dest=rank+1; dest<size; dest++){
2322       svalues = work + rowners_bs[dest];
2323       count   = rowners_bs[dest+1]-rowners_bs[dest];
2324       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2325     }
2326   }
2327 
2328   /* receive values */
2329   if (rank){
2330     rvalues = work;
2331     count   = rowners_bs[rank+1]-rowners_bs[rank];
2332     for (source=0; source<rank; source++){
2333       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2334       /* process values */
2335       for (i=0; i<count; i++){
2336         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2337       }
2338     }
2339   }
2340 
2341   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2342   ierr = PetscFree(work);CHKERRQ(ierr);
2343   PetscFunctionReturn(0);
2344 }
2345 
2346 #undef __FUNCT__
2347 #define __FUNCT__ "MatRelax_MPISBAIJ"
2348 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2349 {
2350   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2351   PetscErrorCode ierr;
2352   PetscInt       mbs=mat->mbs,bs=matin->bs;
2353   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2354   Vec            bb1;
2355 
2356   PetscFunctionBegin;
2357   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2358   if (bs > 1)
2359     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2360 
2361   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2362     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2363       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2364       its--;
2365     }
2366 
2367     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2368     while (its--){
2369 
2370       /* lower triangular part: slvec0b = - B^T*xx */
2371       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2372 
2373       /* copy xx into slvec0a */
2374       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2375       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2376       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2377       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2378 
2379       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2380 
2381       /* copy bb into slvec1a */
2382       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2383       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2384       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2385       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2386 
2387       /* set slvec1b = 0 */
2388       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2389 
2390       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2391       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2392       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2393       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2394 
2395       /* upper triangular part: bb1 = bb1 - B*x */
2396       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2397 
2398       /* local diagonal sweep */
2399       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2400     }
2401     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2402   } else {
2403     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2404   }
2405   PetscFunctionReturn(0);
2406 }
2407 
2408 #undef __FUNCT__
2409 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2410 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2411 {
2412   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2413   PetscErrorCode ierr;
2414   PetscScalar    mone=-1.0;
2415   Vec            lvec1,bb1;
2416 
2417   PetscFunctionBegin;
2418   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2419   if (matin->bs > 1)
2420     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2421 
2422   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2423     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2424       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2425       its--;
2426     }
2427 
2428     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2429     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2430     while (its--){
2431       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2432 
2433       /* lower diagonal part: bb1 = bb - B^T*xx */
2434       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2435       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2436 
2437       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2438       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2439       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2440 
2441       /* upper diagonal part: bb1 = bb1 - B*x */
2442       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2443       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2444 
2445       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2446 
2447       /* diagonal sweep */
2448       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2449     }
2450     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2451     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2452   } else {
2453     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2454   }
2455   PetscFunctionReturn(0);
2456 }
2457 
2458