xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision cb5d8e9ef36bd5726d5c4e38d977490d990864c9)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else {
563       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
564     }
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
571 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
572 {
573   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
574   PetscErrorCode ierr;
575   PetscInt       nstash,reallocs;
576   InsertMode     addv;
577 
578   PetscFunctionBegin;
579   if (baij->donotstash) {
580     PetscFunctionReturn(0);
581   }
582 
583   /* make sure all processors are either in INSERTMODE or ADDMODE */
584   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
585   if (addv == (ADD_VALUES|INSERT_VALUES)) {
586     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
587   }
588   mat->insertmode = addv; /* in case this processor had no cache */
589 
590   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
591   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
592   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
593   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
594   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
595   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
601 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
602 {
603   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
604   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
605   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
606   PetscErrorCode ierr;
607   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
608   PetscInt       *row,*col,other_disassembled;
609   PetscMPIInt    n;
610   PetscTruth     r1,r2,r3;
611   MatScalar      *val;
612   InsertMode     addv = mat->insertmode;
613 
614   PetscFunctionBegin;
615 
616   if (!baij->donotstash) {
617     while (1) {
618       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
619       if (!flg) break;
620 
621       for (i=0; i<n;) {
622         /* Now identify the consecutive vals belonging to the same row */
623         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
624         if (j < n) ncols = j-i;
625         else       ncols = n-i;
626         /* Now assemble all these values with a single function call */
627         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
628         i = j;
629       }
630     }
631     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
632     /* Now process the block-stash. Since the values are stashed column-oriented,
633        set the roworiented flag to column oriented, and after MatSetValues()
634        restore the original flags */
635     r1 = baij->roworiented;
636     r2 = a->roworiented;
637     r3 = b->roworiented;
638     baij->roworiented = PETSC_FALSE;
639     a->roworiented    = PETSC_FALSE;
640     b->roworiented    = PETSC_FALSE;
641     while (1) {
642       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
643       if (!flg) break;
644 
645       for (i=0; i<n;) {
646         /* Now identify the consecutive vals belonging to the same row */
647         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
648         if (j < n) ncols = j-i;
649         else       ncols = n-i;
650         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
651         i = j;
652       }
653     }
654     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
655     baij->roworiented = r1;
656     a->roworiented    = r2;
657     b->roworiented    = r3;
658   }
659 
660   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675 
676   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
677     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
678   }
679   /* b->compressedrow.use = PETSC_TRUE; -- cannot be used on baij->B without customization,
680         because MatMult_MPISBAIJ uses very different algorithm than MatMult_MPIBAIJ() */
681   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
682   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
683 
684   if (baij->rowvalues) {
685     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
686     baij->rowvalues = 0;
687   }
688 
689   PetscFunctionReturn(0);
690 }
691 
692 #undef __FUNCT__
693 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
694 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
695 {
696   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
697   PetscErrorCode    ierr;
698   PetscInt          bs = mat->bs;
699   PetscMPIInt       size = baij->size,rank = baij->rank;
700   PetscTruth        iascii,isdraw;
701   PetscViewer       sviewer;
702   PetscViewerFormat format;
703 
704   PetscFunctionBegin;
705   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
706   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
707   if (iascii) {
708     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
709     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
710       MatInfo info;
711       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
712       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
713       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
714               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
715               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
716       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
717       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
718       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
719       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
720       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
721       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
722       PetscFunctionReturn(0);
723     } else if (format == PETSC_VIEWER_ASCII_INFO) {
724       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
725       PetscFunctionReturn(0);
726     }
727   }
728 
729   if (isdraw) {
730     PetscDraw       draw;
731     PetscTruth isnull;
732     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
733     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
734   }
735 
736   if (size == 1) {
737     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
738     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
739   } else {
740     /* assemble the entire matrix onto first processor. */
741     Mat         A;
742     Mat_SeqSBAIJ *Aloc;
743     Mat_SeqBAIJ *Bloc;
744     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
745     MatScalar   *a;
746 
747     /* Should this be the same type as mat? */
748     if (!rank) {
749       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
750     } else {
751       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
752     }
753     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
754     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
755     PetscLogObjectParent(mat,A);
756 
757     /* copy over the A part */
758     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
759     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
760     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
761 
762     for (i=0; i<mbs; i++) {
763       rvals[0] = bs*(baij->rstart + i);
764       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
765       for (j=ai[i]; j<ai[i+1]; j++) {
766         col = (baij->cstart+aj[j])*bs;
767         for (k=0; k<bs; k++) {
768           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
769           col++; a += bs;
770         }
771       }
772     }
773     /* copy over the B part */
774     Bloc = (Mat_SeqBAIJ*)baij->B->data;
775     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
776     for (i=0; i<mbs; i++) {
777       rvals[0] = bs*(baij->rstart + i);
778       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
779       for (j=ai[i]; j<ai[i+1]; j++) {
780         col = baij->garray[aj[j]]*bs;
781         for (k=0; k<bs; k++) {
782           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
783           col++; a += bs;
784         }
785       }
786     }
787     ierr = PetscFree(rvals);CHKERRQ(ierr);
788     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
789     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
790     /*
791        Everyone has to call to draw the matrix since the graphics waits are
792        synchronized across all processors that share the PetscDraw object
793     */
794     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
795     if (!rank) {
796       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
797       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
798     }
799     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
800     ierr = MatDestroy(A);CHKERRQ(ierr);
801   }
802   PetscFunctionReturn(0);
803 }
804 
805 #undef __FUNCT__
806 #define __FUNCT__ "MatView_MPISBAIJ"
807 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
808 {
809   PetscErrorCode ierr;
810   PetscTruth     iascii,isdraw,issocket,isbinary;
811 
812   PetscFunctionBegin;
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
815   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
816   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
817   if (iascii || isdraw || issocket || isbinary) {
818     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
819   } else {
820     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
821   }
822   PetscFunctionReturn(0);
823 }
824 
825 #undef __FUNCT__
826 #define __FUNCT__ "MatDestroy_MPISBAIJ"
827 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
828 {
829   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
830   PetscErrorCode ierr;
831 
832   PetscFunctionBegin;
833 #if defined(PETSC_USE_LOG)
834   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
835 #endif
836   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
837   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
838   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
839   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
840   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
841 #if defined (PETSC_USE_CTABLE)
842   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
843 #else
844   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
845 #endif
846   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
847   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
848   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
849   if (baij->slvec0) {
850     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
851     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
852   }
853   if (baij->slvec1) {
854     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
855     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
856     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
857   }
858   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
859   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
860   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
861   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
862 #if defined(PETSC_USE_MAT_SINGLE)
863   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
864 #endif
865   ierr = PetscFree(baij);CHKERRQ(ierr);
866 
867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
869   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
870   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatMult_MPISBAIJ"
876 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
877 {
878   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
879   PetscErrorCode ierr;
880   PetscInt       nt,mbs=a->mbs,bs=A->bs;
881   PetscScalar    *x,*from,zero=0.0;
882 
883   PetscFunctionBegin;
884   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
885   if (nt != A->n) {
886     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
887   }
888   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
889   if (nt != A->m) {
890     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
891   }
892 
893   /* diagonal part */
894   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
895   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
896 
897   /* subdiagonal part */
898   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
899 
900   /* copy x into the vec slvec0 */
901   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
902   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
903   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
904   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
905 
906   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
907   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
908   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
909 
910   /* supperdiagonal part */
911   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
912 
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
918 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
919 {
920   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
921   PetscErrorCode ierr;
922   PetscInt       nt;
923 
924   PetscFunctionBegin;
925   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
926   if (nt != A->n) {
927     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
928   }
929   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
930   if (nt != A->m) {
931     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
932   }
933 
934   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
935   /* do diagonal part */
936   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
937   /* do supperdiagonal part */
938   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
939   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
940   /* do subdiagonal part */
941   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
942   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
943   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
944 
945   PetscFunctionReturn(0);
946 }
947 
948 #undef __FUNCT__
949 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
950 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
951 {
952   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
953   PetscErrorCode ierr;
954   PetscInt       mbs=a->mbs,bs=A->bs;
955   PetscScalar    *x,*from,zero=0.0;
956 
957   PetscFunctionBegin;
958   /*
959   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
960   PetscSynchronizedFlush(A->comm);
961   */
962   /* diagonal part */
963   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
964   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
965 
966   /* subdiagonal part */
967   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
968 
969   /* copy x into the vec slvec0 */
970   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
971   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
972   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
973   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
974 
975   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
976   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
977   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
978 
979   /* supperdiagonal part */
980   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 #undef __FUNCT__
986 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
987 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
988 {
989   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
990   PetscErrorCode ierr;
991 
992   PetscFunctionBegin;
993   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
994   /* do diagonal part */
995   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
996   /* do supperdiagonal part */
997   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
998   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
999 
1000   /* do subdiagonal part */
1001   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1002   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1003   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1004 
1005   PetscFunctionReturn(0);
1006 }
1007 
1008 #undef __FUNCT__
1009 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1010 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1011 {
1012   PetscErrorCode ierr;
1013 
1014   PetscFunctionBegin;
1015   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1016   PetscFunctionReturn(0);
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1021 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1022 {
1023   PetscErrorCode ierr;
1024 
1025   PetscFunctionBegin;
1026   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1027   PetscFunctionReturn(0);
1028 }
1029 
1030 /*
1031   This only works correctly for square matrices where the subblock A->A is the
1032    diagonal block
1033 */
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1036 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1037 {
1038   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040 
1041   PetscFunctionBegin;
1042   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1043   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1044   PetscFunctionReturn(0);
1045 }
1046 
1047 #undef __FUNCT__
1048 #define __FUNCT__ "MatScale_MPISBAIJ"
1049 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1050 {
1051   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1052   PetscErrorCode ierr;
1053 
1054   PetscFunctionBegin;
1055   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1056   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1057   PetscFunctionReturn(0);
1058 }
1059 
1060 #undef __FUNCT__
1061 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1062 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1063 {
1064   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1065   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1066   PetscErrorCode ierr;
1067   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1068   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1069   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1070 
1071   PetscFunctionBegin;
1072   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1073   mat->getrowactive = PETSC_TRUE;
1074 
1075   if (!mat->rowvalues && (idx || v)) {
1076     /*
1077         allocate enough space to hold information from the longest row.
1078     */
1079     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1080     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1081     PetscInt     max = 1,mbs = mat->mbs,tmp;
1082     for (i=0; i<mbs; i++) {
1083       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1084       if (max < tmp) { max = tmp; }
1085     }
1086     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1087     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1088   }
1089 
1090   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1091   lrow = row - brstart;  /* local row index */
1092 
1093   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1094   if (!v)   {pvA = 0; pvB = 0;}
1095   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1096   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1097   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1098   nztot = nzA + nzB;
1099 
1100   cmap  = mat->garray;
1101   if (v  || idx) {
1102     if (nztot) {
1103       /* Sort by increasing column numbers, assuming A and B already sorted */
1104       PetscInt imark = -1;
1105       if (v) {
1106         *v = v_p = mat->rowvalues;
1107         for (i=0; i<nzB; i++) {
1108           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1109           else break;
1110         }
1111         imark = i;
1112         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1113         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1114       }
1115       if (idx) {
1116         *idx = idx_p = mat->rowindices;
1117         if (imark > -1) {
1118           for (i=0; i<imark; i++) {
1119             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1120           }
1121         } else {
1122           for (i=0; i<nzB; i++) {
1123             if (cmap[cworkB[i]/bs] < cstart)
1124               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1125             else break;
1126           }
1127           imark = i;
1128         }
1129         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1130         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1131       }
1132     } else {
1133       if (idx) *idx = 0;
1134       if (v)   *v   = 0;
1135     }
1136   }
1137   *nz = nztot;
1138   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1139   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1145 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1146 {
1147   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1148 
1149   PetscFunctionBegin;
1150   if (baij->getrowactive == PETSC_FALSE) {
1151     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1152   }
1153   baij->getrowactive = PETSC_FALSE;
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1159 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1160 {
1161   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1162   PetscErrorCode ierr;
1163 
1164   PetscFunctionBegin;
1165   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1166   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1172 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1173 {
1174   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1175   Mat            A = a->A,B = a->B;
1176   PetscErrorCode ierr;
1177   PetscReal      isend[5],irecv[5];
1178 
1179   PetscFunctionBegin;
1180   info->block_size     = (PetscReal)matin->bs;
1181   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1182   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1183   isend[3] = info->memory;  isend[4] = info->mallocs;
1184   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1185   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1186   isend[3] += info->memory;  isend[4] += info->mallocs;
1187   if (flag == MAT_LOCAL) {
1188     info->nz_used      = isend[0];
1189     info->nz_allocated = isend[1];
1190     info->nz_unneeded  = isend[2];
1191     info->memory       = isend[3];
1192     info->mallocs      = isend[4];
1193   } else if (flag == MAT_GLOBAL_MAX) {
1194     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1195     info->nz_used      = irecv[0];
1196     info->nz_allocated = irecv[1];
1197     info->nz_unneeded  = irecv[2];
1198     info->memory       = irecv[3];
1199     info->mallocs      = irecv[4];
1200   } else if (flag == MAT_GLOBAL_SUM) {
1201     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1202     info->nz_used      = irecv[0];
1203     info->nz_allocated = irecv[1];
1204     info->nz_unneeded  = irecv[2];
1205     info->memory       = irecv[3];
1206     info->mallocs      = irecv[4];
1207   } else {
1208     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1209   }
1210   info->rows_global       = (PetscReal)A->M;
1211   info->columns_global    = (PetscReal)A->N;
1212   info->rows_local        = (PetscReal)A->m;
1213   info->columns_local     = (PetscReal)A->N;
1214   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1215   info->fill_ratio_needed = 0;
1216   info->factor_mallocs    = 0;
1217   PetscFunctionReturn(0);
1218 }
1219 
1220 #undef __FUNCT__
1221 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1222 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1223 {
1224   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1225   PetscErrorCode ierr;
1226 
1227   PetscFunctionBegin;
1228   switch (op) {
1229   case MAT_NO_NEW_NONZERO_LOCATIONS:
1230   case MAT_YES_NEW_NONZERO_LOCATIONS:
1231   case MAT_COLUMNS_UNSORTED:
1232   case MAT_COLUMNS_SORTED:
1233   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1234   case MAT_KEEP_ZEROED_ROWS:
1235   case MAT_NEW_NONZERO_LOCATION_ERR:
1236     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1237     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1238     break;
1239   case MAT_ROW_ORIENTED:
1240     a->roworiented = PETSC_TRUE;
1241     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1242     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1243     break;
1244   case MAT_ROWS_SORTED:
1245   case MAT_ROWS_UNSORTED:
1246   case MAT_YES_NEW_DIAGONALS:
1247     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1248     break;
1249   case MAT_COLUMN_ORIENTED:
1250     a->roworiented = PETSC_FALSE;
1251     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1252     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1253     break;
1254   case MAT_IGNORE_OFF_PROC_ENTRIES:
1255     a->donotstash = PETSC_TRUE;
1256     break;
1257   case MAT_NO_NEW_DIAGONALS:
1258     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1259   case MAT_USE_HASH_TABLE:
1260     a->ht_flag = PETSC_TRUE;
1261     break;
1262   case MAT_NOT_SYMMETRIC:
1263   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1264   case MAT_HERMITIAN:
1265     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1266   case MAT_SYMMETRIC:
1267   case MAT_STRUCTURALLY_SYMMETRIC:
1268   case MAT_NOT_HERMITIAN:
1269   case MAT_SYMMETRY_ETERNAL:
1270   case MAT_NOT_SYMMETRY_ETERNAL:
1271     break;
1272   default:
1273     SETERRQ(PETSC_ERR_SUP,"unknown option");
1274   }
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 #undef __FUNCT__
1279 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1280 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1281 {
1282   PetscErrorCode ierr;
1283   PetscFunctionBegin;
1284   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1285   PetscFunctionReturn(0);
1286 }
1287 
1288 #undef __FUNCT__
1289 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1290 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1291 {
1292   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1293   Mat            a = baij->A,b = baij->B;
1294   PetscErrorCode ierr;
1295   PetscInt       s1,s2,s3;
1296 
1297   PetscFunctionBegin;
1298   if (ll != rr) {
1299     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1300   }
1301   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1302   if (rr) {
1303     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1304     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1305     /* Overlap communication with computation. */
1306     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1307     /*} if (ll) { */
1308     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1309     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1310     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1311     /* } */
1312   /* scale  the diagonal block */
1313   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1314 
1315   /* if (rr) { */
1316     /* Do a scatter end and then right scale the off-diagonal block */
1317     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1318     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1319   }
1320 
1321   PetscFunctionReturn(0);
1322 }
1323 
1324 #undef __FUNCT__
1325 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1326 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1327 {
1328   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1329   MPI_Comm          comm = A->comm;
1330   static PetscTruth called = PETSC_FALSE;
1331   PetscErrorCode    ierr;
1332 
1333   PetscFunctionBegin;
1334   if (!a->rank) {
1335     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1336   }
1337   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1338   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1339   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1340   PetscFunctionReturn(0);
1341 }
1342 
1343 #undef __FUNCT__
1344 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1345 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1346 {
1347   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1348   PetscErrorCode ierr;
1349 
1350   PetscFunctionBegin;
1351   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatEqual_MPISBAIJ"
1359 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1360 {
1361   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1362   Mat            a,b,c,d;
1363   PetscTruth     flg;
1364   PetscErrorCode ierr;
1365 
1366   PetscFunctionBegin;
1367   a = matA->A; b = matA->B;
1368   c = matB->A; d = matB->B;
1369 
1370   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1371   if (flg == PETSC_TRUE) {
1372     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1373   }
1374   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 #undef __FUNCT__
1379 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1380 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1381 {
1382   PetscErrorCode ierr;
1383 
1384   PetscFunctionBegin;
1385   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1386   PetscFunctionReturn(0);
1387 }
1388 
1389 #undef __FUNCT__
1390 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1391 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1392 {
1393   PetscErrorCode ierr;
1394   PetscInt       i;
1395   PetscTruth     flg;
1396 
1397   PetscFunctionBegin;
1398   for (i=0; i<n; i++) {
1399     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1400     if (!flg) {
1401       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1402     }
1403   }
1404   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1405   PetscFunctionReturn(0);
1406 }
1407 
1408 
1409 /* -------------------------------------------------------------------*/
1410 static struct _MatOps MatOps_Values = {
1411        MatSetValues_MPISBAIJ,
1412        MatGetRow_MPISBAIJ,
1413        MatRestoreRow_MPISBAIJ,
1414        MatMult_MPISBAIJ,
1415 /* 4*/ MatMultAdd_MPISBAIJ,
1416        MatMultTranspose_MPISBAIJ,
1417        MatMultTransposeAdd_MPISBAIJ,
1418        0,
1419        0,
1420        0,
1421 /*10*/ 0,
1422        0,
1423        0,
1424        MatRelax_MPISBAIJ,
1425        MatTranspose_MPISBAIJ,
1426 /*15*/ MatGetInfo_MPISBAIJ,
1427        MatEqual_MPISBAIJ,
1428        MatGetDiagonal_MPISBAIJ,
1429        MatDiagonalScale_MPISBAIJ,
1430        MatNorm_MPISBAIJ,
1431 /*20*/ MatAssemblyBegin_MPISBAIJ,
1432        MatAssemblyEnd_MPISBAIJ,
1433        0,
1434        MatSetOption_MPISBAIJ,
1435        MatZeroEntries_MPISBAIJ,
1436 /*25*/ 0,
1437        0,
1438        0,
1439        0,
1440        0,
1441 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1442        0,
1443        0,
1444        0,
1445        0,
1446 /*35*/ MatDuplicate_MPISBAIJ,
1447        0,
1448        0,
1449        0,
1450        0,
1451 /*40*/ 0,
1452        MatGetSubMatrices_MPISBAIJ,
1453        MatIncreaseOverlap_MPISBAIJ,
1454        MatGetValues_MPISBAIJ,
1455        0,
1456 /*45*/ MatPrintHelp_MPISBAIJ,
1457        MatScale_MPISBAIJ,
1458        0,
1459        0,
1460        0,
1461 /*50*/ 0,
1462        0,
1463        0,
1464        0,
1465        0,
1466 /*55*/ 0,
1467        0,
1468        MatSetUnfactored_MPISBAIJ,
1469        0,
1470        MatSetValuesBlocked_MPISBAIJ,
1471 /*60*/ 0,
1472        0,
1473        0,
1474        MatGetPetscMaps_Petsc,
1475        0,
1476 /*65*/ 0,
1477        0,
1478        0,
1479        0,
1480        0,
1481 /*70*/ MatGetRowMax_MPISBAIJ,
1482        0,
1483        0,
1484        0,
1485        0,
1486 /*75*/ 0,
1487        0,
1488        0,
1489        0,
1490        0,
1491 /*80*/ 0,
1492        0,
1493        0,
1494        0,
1495        MatLoad_MPISBAIJ,
1496 /*85*/ 0,
1497        0,
1498        0,
1499        0,
1500        0,
1501 /*90*/ 0,
1502        0,
1503        0,
1504        0,
1505        0,
1506 /*95*/ 0,
1507        0,
1508        0,
1509        0};
1510 
1511 
1512 EXTERN_C_BEGIN
1513 #undef __FUNCT__
1514 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1515 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1516 {
1517   PetscFunctionBegin;
1518   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1519   *iscopy = PETSC_FALSE;
1520   PetscFunctionReturn(0);
1521 }
1522 EXTERN_C_END
1523 
1524 EXTERN_C_BEGIN
1525 #undef __FUNCT__
1526 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1527 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1528 {
1529   Mat_MPISBAIJ   *b;
1530   PetscErrorCode ierr;
1531   PetscInt       i,mbs,Mbs;
1532 
1533   PetscFunctionBegin;
1534   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1535 
1536   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1537   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1538   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1539   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1540   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1541   if (d_nnz) {
1542     for (i=0; i<B->m/bs; i++) {
1543       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1544     }
1545   }
1546   if (o_nnz) {
1547     for (i=0; i<B->m/bs; i++) {
1548       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1549     }
1550   }
1551   B->preallocated = PETSC_TRUE;
1552   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1553   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1554   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1555   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1556 
1557   b   = (Mat_MPISBAIJ*)B->data;
1558   mbs = B->m/bs;
1559   Mbs = B->M/bs;
1560   if (mbs*bs != B->m) {
1561     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1562   }
1563 
1564   B->bs  = bs;
1565   b->bs2 = bs*bs;
1566   b->mbs = mbs;
1567   b->nbs = mbs;
1568   b->Mbs = Mbs;
1569   b->Nbs = Mbs;
1570 
1571   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1572   b->rowners[0]    = 0;
1573   for (i=2; i<=b->size; i++) {
1574     b->rowners[i] += b->rowners[i-1];
1575   }
1576   b->rstart    = b->rowners[b->rank];
1577   b->rend      = b->rowners[b->rank+1];
1578   b->cstart    = b->rstart;
1579   b->cend      = b->rend;
1580   for (i=0; i<=b->size; i++) {
1581     b->rowners_bs[i] = b->rowners[i]*bs;
1582   }
1583   b->rstart_bs = b-> rstart*bs;
1584   b->rend_bs   = b->rend*bs;
1585 
1586   b->cstart_bs = b->cstart*bs;
1587   b->cend_bs   = b->cend*bs;
1588 
1589   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1590   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1591   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1592   PetscLogObjectParent(B,b->A);
1593 
1594   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1595   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1596   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1597   PetscLogObjectParent(B,b->B);
1598 
1599   /* build cache for off array entries formed */
1600   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1601 
1602   PetscFunctionReturn(0);
1603 }
1604 EXTERN_C_END
1605 
1606 /*MC
1607    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1608    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1609 
1610    Options Database Keys:
1611 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1612 
1613   Level: beginner
1614 
1615 .seealso: MatCreateMPISBAIJ
1616 M*/
1617 
1618 EXTERN_C_BEGIN
1619 #undef __FUNCT__
1620 #define __FUNCT__ "MatCreate_MPISBAIJ"
1621 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1622 {
1623   Mat_MPISBAIJ   *b;
1624   PetscErrorCode ierr;
1625   PetscTruth     flg;
1626 
1627   PetscFunctionBegin;
1628 
1629   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1630   B->data = (void*)b;
1631   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1632 
1633   B->ops->destroy    = MatDestroy_MPISBAIJ;
1634   B->ops->view       = MatView_MPISBAIJ;
1635   B->mapping    = 0;
1636   B->factor     = 0;
1637   B->assembled  = PETSC_FALSE;
1638 
1639   B->insertmode = NOT_SET_VALUES;
1640   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1641   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1642 
1643   /* build local table of row and column ownerships */
1644   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1645   b->cowners    = b->rowners + b->size + 2;
1646   b->rowners_bs = b->cowners + b->size + 2;
1647   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1648 
1649   /* build cache for off array entries formed */
1650   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1651   b->donotstash  = PETSC_FALSE;
1652   b->colmap      = PETSC_NULL;
1653   b->garray      = PETSC_NULL;
1654   b->roworiented = PETSC_TRUE;
1655 
1656 #if defined(PETSC_USE_MAT_SINGLE)
1657   /* stuff for MatSetValues_XXX in single precision */
1658   b->setvalueslen     = 0;
1659   b->setvaluescopy    = PETSC_NULL;
1660 #endif
1661 
1662   /* stuff used in block assembly */
1663   b->barray       = 0;
1664 
1665   /* stuff used for matrix vector multiply */
1666   b->lvec         = 0;
1667   b->Mvctx        = 0;
1668   b->slvec0       = 0;
1669   b->slvec0b      = 0;
1670   b->slvec1       = 0;
1671   b->slvec1a      = 0;
1672   b->slvec1b      = 0;
1673   b->sMvctx       = 0;
1674 
1675   /* stuff for MatGetRow() */
1676   b->rowindices   = 0;
1677   b->rowvalues    = 0;
1678   b->getrowactive = PETSC_FALSE;
1679 
1680   /* hash table stuff */
1681   b->ht           = 0;
1682   b->hd           = 0;
1683   b->ht_size      = 0;
1684   b->ht_flag      = PETSC_FALSE;
1685   b->ht_fact      = 0;
1686   b->ht_total_ct  = 0;
1687   b->ht_insert_ct = 0;
1688 
1689   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1690   if (flg) {
1691     PetscReal fact = 1.39;
1692     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1693     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1694     if (fact <= 1.0) fact = 1.39;
1695     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1696     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1697   }
1698   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1699                                      "MatStoreValues_MPISBAIJ",
1700                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1701   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1702                                      "MatRetrieveValues_MPISBAIJ",
1703                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1704   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1705                                      "MatGetDiagonalBlock_MPISBAIJ",
1706                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1707   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1708                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1709                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1710   B->symmetric                  = PETSC_TRUE;
1711   B->structurally_symmetric     = PETSC_TRUE;
1712   B->symmetric_set              = PETSC_TRUE;
1713   B->structurally_symmetric_set = PETSC_TRUE;
1714   PetscFunctionReturn(0);
1715 }
1716 EXTERN_C_END
1717 
1718 /*MC
1719    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1720 
1721    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1722    and MATMPISBAIJ otherwise.
1723 
1724    Options Database Keys:
1725 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1726 
1727   Level: beginner
1728 
1729 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1730 M*/
1731 
1732 EXTERN_C_BEGIN
1733 #undef __FUNCT__
1734 #define __FUNCT__ "MatCreate_SBAIJ"
1735 PetscErrorCode MatCreate_SBAIJ(Mat A)
1736 {
1737   PetscErrorCode ierr;
1738   PetscMPIInt    size;
1739 
1740   PetscFunctionBegin;
1741   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1742   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1743   if (size == 1) {
1744     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1745   } else {
1746     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1747   }
1748   PetscFunctionReturn(0);
1749 }
1750 EXTERN_C_END
1751 
1752 #undef __FUNCT__
1753 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1754 /*@C
1755    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1756    the user should preallocate the matrix storage by setting the parameters
1757    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1758    performance can be increased by more than a factor of 50.
1759 
1760    Collective on Mat
1761 
1762    Input Parameters:
1763 +  A - the matrix
1764 .  bs   - size of blockk
1765 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1766            submatrix  (same for all local rows)
1767 .  d_nnz - array containing the number of block nonzeros in the various block rows
1768            in the upper triangular and diagonal part of the in diagonal portion of the local
1769            (possibly different for each block row) or PETSC_NULL.  You must leave room
1770            for the diagonal entry even if it is zero.
1771 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1772            submatrix (same for all local rows).
1773 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1774            off-diagonal portion of the local submatrix (possibly different for
1775            each block row) or PETSC_NULL.
1776 
1777 
1778    Options Database Keys:
1779 .   -mat_no_unroll - uses code that does not unroll the loops in the
1780                      block calculations (much slower)
1781 .   -mat_block_size - size of the blocks to use
1782 
1783    Notes:
1784 
1785    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1786    than it must be used on all processors that share the object for that argument.
1787 
1788    If the *_nnz parameter is given then the *_nz parameter is ignored
1789 
1790    Storage Information:
1791    For a square global matrix we define each processor's diagonal portion
1792    to be its local rows and the corresponding columns (a square submatrix);
1793    each processor's off-diagonal portion encompasses the remainder of the
1794    local matrix (a rectangular submatrix).
1795 
1796    The user can specify preallocated storage for the diagonal part of
1797    the local submatrix with either d_nz or d_nnz (not both).  Set
1798    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1799    memory allocation.  Likewise, specify preallocated storage for the
1800    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1801 
1802    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1803    the figure below we depict these three local rows and all columns (0-11).
1804 
1805 .vb
1806            0 1 2 3 4 5 6 7 8 9 10 11
1807           -------------------
1808    row 3  |  o o o d d d o o o o o o
1809    row 4  |  o o o d d d o o o o o o
1810    row 5  |  o o o d d d o o o o o o
1811           -------------------
1812 .ve
1813 
1814    Thus, any entries in the d locations are stored in the d (diagonal)
1815    submatrix, and any entries in the o locations are stored in the
1816    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1817    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1818 
1819    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1820    plus the diagonal part of the d matrix,
1821    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1822    In general, for PDE problems in which most nonzeros are near the diagonal,
1823    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1824    or you will get TERRIBLE performance; see the users' manual chapter on
1825    matrices.
1826 
1827    Level: intermediate
1828 
1829 .keywords: matrix, block, aij, compressed row, sparse, parallel
1830 
1831 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1832 @*/
1833 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1834 {
1835   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1836 
1837   PetscFunctionBegin;
1838   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1839   if (f) {
1840     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1841   }
1842   PetscFunctionReturn(0);
1843 }
1844 
1845 #undef __FUNCT__
1846 #define __FUNCT__ "MatCreateMPISBAIJ"
1847 /*@C
1848    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1849    (block compressed row).  For good matrix assembly performance
1850    the user should preallocate the matrix storage by setting the parameters
1851    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1852    performance can be increased by more than a factor of 50.
1853 
1854    Collective on MPI_Comm
1855 
1856    Input Parameters:
1857 +  comm - MPI communicator
1858 .  bs   - size of blockk
1859 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1860            This value should be the same as the local size used in creating the
1861            y vector for the matrix-vector product y = Ax.
1862 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1863            This value should be the same as the local size used in creating the
1864            x vector for the matrix-vector product y = Ax.
1865 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1866 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1867 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1868            submatrix  (same for all local rows)
1869 .  d_nnz - array containing the number of block nonzeros in the various block rows
1870            in the upper triangular portion of the in diagonal portion of the local
1871            (possibly different for each block block row) or PETSC_NULL.
1872            You must leave room for the diagonal entry even if it is zero.
1873 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1874            submatrix (same for all local rows).
1875 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1876            off-diagonal portion of the local submatrix (possibly different for
1877            each block row) or PETSC_NULL.
1878 
1879    Output Parameter:
1880 .  A - the matrix
1881 
1882    Options Database Keys:
1883 .   -mat_no_unroll - uses code that does not unroll the loops in the
1884                      block calculations (much slower)
1885 .   -mat_block_size - size of the blocks to use
1886 .   -mat_mpi - use the parallel matrix data structures even on one processor
1887                (defaults to using SeqBAIJ format on one processor)
1888 
1889    Notes:
1890    The user MUST specify either the local or global matrix dimensions
1891    (possibly both).
1892 
1893    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1894    than it must be used on all processors that share the object for that argument.
1895 
1896    If the *_nnz parameter is given then the *_nz parameter is ignored
1897 
1898    Storage Information:
1899    For a square global matrix we define each processor's diagonal portion
1900    to be its local rows and the corresponding columns (a square submatrix);
1901    each processor's off-diagonal portion encompasses the remainder of the
1902    local matrix (a rectangular submatrix).
1903 
1904    The user can specify preallocated storage for the diagonal part of
1905    the local submatrix with either d_nz or d_nnz (not both).  Set
1906    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1907    memory allocation.  Likewise, specify preallocated storage for the
1908    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1909 
1910    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1911    the figure below we depict these three local rows and all columns (0-11).
1912 
1913 .vb
1914            0 1 2 3 4 5 6 7 8 9 10 11
1915           -------------------
1916    row 3  |  o o o d d d o o o o o o
1917    row 4  |  o o o d d d o o o o o o
1918    row 5  |  o o o d d d o o o o o o
1919           -------------------
1920 .ve
1921 
1922    Thus, any entries in the d locations are stored in the d (diagonal)
1923    submatrix, and any entries in the o locations are stored in the
1924    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1925    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1926 
1927    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1928    plus the diagonal part of the d matrix,
1929    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1930    In general, for PDE problems in which most nonzeros are near the diagonal,
1931    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1932    or you will get TERRIBLE performance; see the users' manual chapter on
1933    matrices.
1934 
1935    Level: intermediate
1936 
1937 .keywords: matrix, block, aij, compressed row, sparse, parallel
1938 
1939 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1940 @*/
1941 
1942 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1943 {
1944   PetscErrorCode ierr;
1945   PetscMPIInt    size;
1946 
1947   PetscFunctionBegin;
1948   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1949   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1950   if (size > 1) {
1951     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1952     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1953   } else {
1954     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1955     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1956   }
1957   PetscFunctionReturn(0);
1958 }
1959 
1960 
1961 #undef __FUNCT__
1962 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1963 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1964 {
1965   Mat            mat;
1966   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1967   PetscErrorCode ierr;
1968   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1969   PetscScalar    *array;
1970 
1971   PetscFunctionBegin;
1972   *newmat       = 0;
1973   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1974   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1975   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1976 
1977   mat->factor       = matin->factor;
1978   mat->preallocated = PETSC_TRUE;
1979   mat->assembled    = PETSC_TRUE;
1980   mat->insertmode   = NOT_SET_VALUES;
1981 
1982   a = (Mat_MPISBAIJ*)mat->data;
1983   mat->bs  = matin->bs;
1984   a->bs2   = oldmat->bs2;
1985   a->mbs   = oldmat->mbs;
1986   a->nbs   = oldmat->nbs;
1987   a->Mbs   = oldmat->Mbs;
1988   a->Nbs   = oldmat->Nbs;
1989 
1990   a->rstart       = oldmat->rstart;
1991   a->rend         = oldmat->rend;
1992   a->cstart       = oldmat->cstart;
1993   a->cend         = oldmat->cend;
1994   a->size         = oldmat->size;
1995   a->rank         = oldmat->rank;
1996   a->donotstash   = oldmat->donotstash;
1997   a->roworiented  = oldmat->roworiented;
1998   a->rowindices   = 0;
1999   a->rowvalues    = 0;
2000   a->getrowactive = PETSC_FALSE;
2001   a->barray       = 0;
2002   a->rstart_bs    = oldmat->rstart_bs;
2003   a->rend_bs      = oldmat->rend_bs;
2004   a->cstart_bs    = oldmat->cstart_bs;
2005   a->cend_bs      = oldmat->cend_bs;
2006 
2007   /* hash table stuff */
2008   a->ht           = 0;
2009   a->hd           = 0;
2010   a->ht_size      = 0;
2011   a->ht_flag      = oldmat->ht_flag;
2012   a->ht_fact      = oldmat->ht_fact;
2013   a->ht_total_ct  = 0;
2014   a->ht_insert_ct = 0;
2015 
2016   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2017   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2018   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2019   if (oldmat->colmap) {
2020 #if defined (PETSC_USE_CTABLE)
2021     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2022 #else
2023     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2024     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2025     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2026 #endif
2027   } else a->colmap = 0;
2028 
2029   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2030     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2031     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2032     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2033   } else a->garray = 0;
2034 
2035   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2036   PetscLogObjectParent(mat,a->lvec);
2037   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2038   PetscLogObjectParent(mat,a->Mvctx);
2039 
2040   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2041   PetscLogObjectParent(mat,a->slvec0);
2042   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2043   PetscLogObjectParent(mat,a->slvec1);
2044 
2045   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2046   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2047   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2048   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2049   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2050   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2051   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2052   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2053   PetscLogObjectParent(mat,a->slvec0);
2054   PetscLogObjectParent(mat,a->slvec1);
2055   PetscLogObjectParent(mat,a->slvec0b);
2056   PetscLogObjectParent(mat,a->slvec1a);
2057   PetscLogObjectParent(mat,a->slvec1b);
2058 
2059   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2060   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2061   a->sMvctx = oldmat->sMvctx;
2062   PetscLogObjectParent(mat,a->sMvctx);
2063 
2064   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2065   PetscLogObjectParent(mat,a->A);
2066   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2067   PetscLogObjectParent(mat,a->B);
2068   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2069   *newmat = mat;
2070   PetscFunctionReturn(0);
2071 }
2072 
2073 #include "petscsys.h"
2074 
2075 #undef __FUNCT__
2076 #define __FUNCT__ "MatLoad_MPISBAIJ"
2077 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2078 {
2079   Mat            A;
2080   PetscErrorCode ierr;
2081   PetscInt       i,nz,j,rstart,rend;
2082   PetscScalar    *vals,*buf;
2083   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2084   MPI_Status     status;
2085   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2086   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2087   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2088   PetscInt       bs=1,Mbs,mbs,extra_rows;
2089   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2090   PetscInt       dcount,kmax,k,nzcount,tmp;
2091   int            fd;
2092 
2093   PetscFunctionBegin;
2094   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2095 
2096   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2097   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2098   if (!rank) {
2099     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2100     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2101     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2102     if (header[3] < 0) {
2103       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2104     }
2105   }
2106 
2107   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2108   M = header[1]; N = header[2];
2109 
2110   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2111 
2112   /*
2113      This code adds extra rows to make sure the number of rows is
2114      divisible by the blocksize
2115   */
2116   Mbs        = M/bs;
2117   extra_rows = bs - M + bs*(Mbs);
2118   if (extra_rows == bs) extra_rows = 0;
2119   else                  Mbs++;
2120   if (extra_rows &&!rank) {
2121     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2122   }
2123 
2124   /* determine ownership of all rows */
2125   mbs        = Mbs/size + ((Mbs % size) > rank);
2126   m          = mbs*bs;
2127   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2128   browners   = rowners + size + 1;
2129   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2130   rowners[0] = 0;
2131   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2132   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2133   rstart = rowners[rank];
2134   rend   = rowners[rank+1];
2135 
2136   /* distribute row lengths to all processors */
2137   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2138   if (!rank) {
2139     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2140     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2141     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2142     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2143     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2144     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2145     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2146   } else {
2147     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2148   }
2149 
2150   if (!rank) {   /* procs[0] */
2151     /* calculate the number of nonzeros on each processor */
2152     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2153     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2154     for (i=0; i<size; i++) {
2155       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2156         procsnz[i] += rowlengths[j];
2157       }
2158     }
2159     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2160 
2161     /* determine max buffer needed and allocate it */
2162     maxnz = 0;
2163     for (i=0; i<size; i++) {
2164       maxnz = PetscMax(maxnz,procsnz[i]);
2165     }
2166     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2167 
2168     /* read in my part of the matrix column indices  */
2169     nz     = procsnz[0];
2170     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2171     mycols = ibuf;
2172     if (size == 1)  nz -= extra_rows;
2173     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2174     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2175 
2176     /* read in every ones (except the last) and ship off */
2177     for (i=1; i<size-1; i++) {
2178       nz   = procsnz[i];
2179       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2180       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2181     }
2182     /* read in the stuff for the last proc */
2183     if (size != 1) {
2184       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2185       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2186       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2187       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2188     }
2189     ierr = PetscFree(cols);CHKERRQ(ierr);
2190   } else {  /* procs[i], i>0 */
2191     /* determine buffer space needed for message */
2192     nz = 0;
2193     for (i=0; i<m; i++) {
2194       nz += locrowlens[i];
2195     }
2196     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2197     mycols = ibuf;
2198     /* receive message of column indices*/
2199     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2200     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2201     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2202   }
2203 
2204   /* loop over local rows, determining number of off diagonal entries */
2205   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2206   odlens   = dlens + (rend-rstart);
2207   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2208   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2209   masked1  = mask    + Mbs;
2210   masked2  = masked1 + Mbs;
2211   rowcount = 0; nzcount = 0;
2212   for (i=0; i<mbs; i++) {
2213     dcount  = 0;
2214     odcount = 0;
2215     for (j=0; j<bs; j++) {
2216       kmax = locrowlens[rowcount];
2217       for (k=0; k<kmax; k++) {
2218         tmp = mycols[nzcount++]/bs; /* block col. index */
2219         if (!mask[tmp]) {
2220           mask[tmp] = 1;
2221           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2222           else masked1[dcount++] = tmp; /* entry in diag portion */
2223         }
2224       }
2225       rowcount++;
2226     }
2227 
2228     dlens[i]  = dcount;  /* d_nzz[i] */
2229     odlens[i] = odcount; /* o_nzz[i] */
2230 
2231     /* zero out the mask elements we set */
2232     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2233     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2234   }
2235 
2236   /* create our matrix */
2237   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2238   ierr = MatSetType(A,type);CHKERRQ(ierr);
2239   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2240   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2241 
2242   if (!rank) {
2243     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2244     /* read in my part of the matrix numerical values  */
2245     nz = procsnz[0];
2246     vals = buf;
2247     mycols = ibuf;
2248     if (size == 1)  nz -= extra_rows;
2249     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2250     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2251 
2252     /* insert into matrix */
2253     jj      = rstart*bs;
2254     for (i=0; i<m; i++) {
2255       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2256       mycols += locrowlens[i];
2257       vals   += locrowlens[i];
2258       jj++;
2259     }
2260 
2261     /* read in other processors (except the last one) and ship out */
2262     for (i=1; i<size-1; i++) {
2263       nz   = procsnz[i];
2264       vals = buf;
2265       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2266       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2267     }
2268     /* the last proc */
2269     if (size != 1){
2270       nz   = procsnz[i] - extra_rows;
2271       vals = buf;
2272       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2273       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2274       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2275     }
2276     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2277 
2278   } else {
2279     /* receive numeric values */
2280     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2281 
2282     /* receive message of values*/
2283     vals   = buf;
2284     mycols = ibuf;
2285     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2286     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2287     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2288 
2289     /* insert into matrix */
2290     jj      = rstart*bs;
2291     for (i=0; i<m; i++) {
2292       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2293       mycols += locrowlens[i];
2294       vals   += locrowlens[i];
2295       jj++;
2296     }
2297   }
2298 
2299   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2300   ierr = PetscFree(buf);CHKERRQ(ierr);
2301   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2302   ierr = PetscFree(rowners);CHKERRQ(ierr);
2303   ierr = PetscFree(dlens);CHKERRQ(ierr);
2304   ierr = PetscFree(mask);CHKERRQ(ierr);
2305   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2307   *newmat = A;
2308   PetscFunctionReturn(0);
2309 }
2310 
2311 #undef __FUNCT__
2312 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2313 /*XXXXX@
2314    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2315 
2316    Input Parameters:
2317 .  mat  - the matrix
2318 .  fact - factor
2319 
2320    Collective on Mat
2321 
2322    Level: advanced
2323 
2324   Notes:
2325    This can also be set by the command line option: -mat_use_hash_table fact
2326 
2327 .keywords: matrix, hashtable, factor, HT
2328 
2329 .seealso: MatSetOption()
2330 @XXXXX*/
2331 
2332 
2333 #undef __FUNCT__
2334 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2335 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2336 {
2337   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2338   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2339   PetscReal      atmp;
2340   PetscReal      *work,*svalues,*rvalues;
2341   PetscErrorCode ierr;
2342   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2343   PetscMPIInt    rank,size;
2344   PetscInt       *rowners_bs,dest,count,source;
2345   PetscScalar    *va;
2346   MatScalar      *ba;
2347   MPI_Status     stat;
2348 
2349   PetscFunctionBegin;
2350   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2351   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2352 
2353   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2354   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2355 
2356   bs   = A->bs;
2357   mbs  = a->mbs;
2358   Mbs  = a->Mbs;
2359   ba   = b->a;
2360   bi   = b->i;
2361   bj   = b->j;
2362 
2363   /* find ownerships */
2364   rowners_bs = a->rowners_bs;
2365 
2366   /* each proc creates an array to be distributed */
2367   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2368   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2369 
2370   /* row_max for B */
2371   if (rank != size-1){
2372     for (i=0; i<mbs; i++) {
2373       ncols = bi[1] - bi[0]; bi++;
2374       brow  = bs*i;
2375       for (j=0; j<ncols; j++){
2376         bcol = bs*(*bj);
2377         for (kcol=0; kcol<bs; kcol++){
2378           col = bcol + kcol;                 /* local col index */
2379           col += rowners_bs[rank+1];      /* global col index */
2380           for (krow=0; krow<bs; krow++){
2381             atmp = PetscAbsScalar(*ba); ba++;
2382             row = brow + krow;    /* local row index */
2383             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2384             if (work[col] < atmp) work[col] = atmp;
2385           }
2386         }
2387         bj++;
2388       }
2389     }
2390 
2391     /* send values to its owners */
2392     for (dest=rank+1; dest<size; dest++){
2393       svalues = work + rowners_bs[dest];
2394       count   = rowners_bs[dest+1]-rowners_bs[dest];
2395       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2396     }
2397   }
2398 
2399   /* receive values */
2400   if (rank){
2401     rvalues = work;
2402     count   = rowners_bs[rank+1]-rowners_bs[rank];
2403     for (source=0; source<rank; source++){
2404       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2405       /* process values */
2406       for (i=0; i<count; i++){
2407         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2408       }
2409     }
2410   }
2411 
2412   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2413   ierr = PetscFree(work);CHKERRQ(ierr);
2414   PetscFunctionReturn(0);
2415 }
2416 
2417 #undef __FUNCT__
2418 #define __FUNCT__ "MatRelax_MPISBAIJ"
2419 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2420 {
2421   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2422   PetscErrorCode ierr;
2423   PetscInt       mbs=mat->mbs,bs=matin->bs;
2424   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2425   Vec            bb1;
2426 
2427   PetscFunctionBegin;
2428   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2429   if (bs > 1)
2430     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2431 
2432   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2433     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2434       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2435       its--;
2436     }
2437 
2438     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2439     while (its--){
2440 
2441       /* lower triangular part: slvec0b = - B^T*xx */
2442       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2443 
2444       /* copy xx into slvec0a */
2445       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2446       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2447       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2448       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2449 
2450       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2451 
2452       /* copy bb into slvec1a */
2453       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2454       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2455       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2456       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2457 
2458       /* set slvec1b = 0 */
2459       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2460 
2461       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2462       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2463       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2464       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2465 
2466       /* upper triangular part: bb1 = bb1 - B*x */
2467       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2468 
2469       /* local diagonal sweep */
2470       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2471     }
2472     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2473   } else {
2474     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2475   }
2476   PetscFunctionReturn(0);
2477 }
2478 
2479 #undef __FUNCT__
2480 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2481 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2482 {
2483   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2484   PetscErrorCode ierr;
2485   PetscScalar    mone=-1.0;
2486   Vec            lvec1,bb1;
2487 
2488   PetscFunctionBegin;
2489   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2490   if (matin->bs > 1)
2491     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2492 
2493   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2494     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2495       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2496       its--;
2497     }
2498 
2499     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2500     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2501     while (its--){
2502       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2503 
2504       /* lower diagonal part: bb1 = bb - B^T*xx */
2505       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2506       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2507 
2508       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2509       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2510       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2511 
2512       /* upper diagonal part: bb1 = bb1 - B*x */
2513       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2514       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2515 
2516       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2517 
2518       /* diagonal sweep */
2519       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2520     }
2521     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2522     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2523   } else {
2524     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2525   }
2526   PetscFunctionReturn(0);
2527 }
2528 
2529