xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision a96a251defc07734f733d1abf27b5765e87fdc8b)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,a->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\
112  \
113         /* copy over old data into new slots */ \
114         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
115         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
116         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
117         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
118         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
119         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
120         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
122         /* free up old matrix storage */ \
123        if (!a->singlemalloc) {\
124           ierr = PetscFree(a->a);CHKERRQ(ierr);\
125           ierr = PetscFree(a->i);CHKERRQ(ierr);\
126           ierr = PetscFree(a->j);CHKERRQ(ierr);\
127         } else {\
128           ierr = PetscFree3(a->a,a->i,a->j);CHKERRQ(ierr);\
129         }\
130         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
131         a->singlemalloc = PETSC_TRUE; \
132  \
133         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
134         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
135         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
136         a->maxnz += bs2*CHUNKSIZE; \
137         a->reallocs++; \
138         a->nz++; \
139       } \
140       N = nrow++ - 1;  \
141       /* shift up all the later entries in this row */ \
142       for (ii=N; ii>=_i; ii--) { \
143         rp[ii+1] = rp[ii]; \
144         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
145       } \
146       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
147       rp[_i]                      = bcol;  \
148       ap[bs2*_i + bs*cidx + ridx] = value;  \
149       a_noinsert:; \
150     ailen[brow] = nrow; \
151 }
152 #ifndef MatSetValues_SeqBAIJ_B_Private
153 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
154 { \
155     brow = row/bs;  \
156     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
157     rmax = bimax[brow]; nrow = bilen[brow]; \
158       bcol = col/bs; \
159       ridx = row % bs; cidx = col % bs; \
160       low = 0; high = nrow; \
161       while (high-low > 3) { \
162         t = (low+high)/2; \
163         if (rp[t] > bcol) high = t; \
164         else              low  = t; \
165       } \
166       for (_i=low; _i<high; _i++) { \
167         if (rp[_i] > bcol) break; \
168         if (rp[_i] == bcol) { \
169           bap  = ap +  bs2*_i + bs*cidx + ridx; \
170           if (addv == ADD_VALUES) *bap += value;  \
171           else                    *bap  = value;  \
172           goto b_noinsert; \
173         } \
174       } \
175       if (b->nonew == 1) goto b_noinsert; \
176       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
177       if (nrow >= rmax) { \
178         /* there is no extra room in row, therefore enlarge */ \
179         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
180         MatScalar *new_a; \
181  \
182         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
183  \
184         /* malloc new storage space */ \
185         ierr = PetscMalloc3(bs2*new_nz,PetscScalar,&new_a,new_nz,PetscInt,&new_j,b->mbs+1,PetscInt,&new_i);CHKERRQ(ierr);\
186  \
187         /* copy over old data into new slots */ \
188         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
189         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
190         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
191         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
192         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
193         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
194         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
195         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE),ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
196         /* free up old matrix storage */ \
197        if (!b->singlemalloc) {\
198           ierr = PetscFree(b->a);CHKERRQ(ierr);\
199           ierr = PetscFree(b->i);CHKERRQ(ierr);\
200           ierr = PetscFree(b->j);CHKERRQ(ierr);\
201         } else {\
202           ierr = PetscFree3(b->a,b->i,b->j);CHKERRQ(ierr);\
203         }\
204         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
205         b->singlemalloc = PETSC_TRUE; \
206  \
207         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
208         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
209         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
210         b->maxnz += bs2*CHUNKSIZE; \
211         b->reallocs++; \
212         b->nz++; \
213       } \
214       N = nrow++ - 1;  \
215       /* shift up all the later entries in this row */ \
216       for (ii=N; ii>=_i; ii--) { \
217         rp[ii+1] = rp[ii]; \
218         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
219       } \
220       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
221       rp[_i]                      = bcol;  \
222       ap[bs2*_i + bs*cidx + ridx] = value;  \
223       b_noinsert:; \
224     bilen[brow] = nrow; \
225 }
226 #endif
227 
228 #if defined(PETSC_USE_MAT_SINGLE)
229 #undef __FUNCT__
230 #define __FUNCT__ "MatSetValues_MPISBAIJ"
231 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
232 {
233   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
234   PetscErrorCode ierr;
235   PetscInt       i,N = m*n;
236   MatScalar      *vsingle;
237 
238   PetscFunctionBegin;
239   if (N > b->setvalueslen) {
240     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
241     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
242     b->setvalueslen  = N;
243   }
244   vsingle = b->setvaluescopy;
245 
246   for (i=0; i<N; i++) {
247     vsingle[i] = v[i];
248   }
249   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
250   PetscFunctionReturn(0);
251 }
252 
253 #undef __FUNCT__
254 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
255 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
256 {
257   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
258   PetscErrorCode ierr;
259   PetscInt       i,N = m*n*b->bs2;
260   MatScalar      *vsingle;
261 
262   PetscFunctionBegin;
263   if (N > b->setvalueslen) {
264     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
265     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
266     b->setvalueslen  = N;
267   }
268   vsingle = b->setvaluescopy;
269   for (i=0; i<N; i++) {
270     vsingle[i] = v[i];
271   }
272   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
273   PetscFunctionReturn(0);
274 }
275 #endif
276 
277 /* Only add/insert a(i,j) with i<=j (blocks).
278    Any a(i,j) with i>j input by user is ingored.
279 */
280 #undef __FUNCT__
281 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
282 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
283 {
284   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
285   MatScalar      value;
286   PetscTruth     roworiented = baij->roworiented;
287   PetscErrorCode ierr;
288   PetscInt       i,j,row,col;
289   PetscInt       rstart_orig=baij->rstart_bs;
290   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
291   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
292 
293   /* Some Variables required in the macro */
294   Mat            A = baij->A;
295   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
296   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
297   MatScalar      *aa=a->a;
298 
299   Mat            B = baij->B;
300   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
301   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
302   MatScalar     *ba=b->a;
303 
304   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
305   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
306   MatScalar     *ap,*bap;
307 
308   /* for stash */
309   PetscInt      n_loc, *in_loc=0;
310   MatScalar     *v_loc=0;
311 
312   PetscFunctionBegin;
313 
314   if(!baij->donotstash){
315     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
316     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
317   }
318 
319   for (i=0; i<m; i++) {
320     if (im[i] < 0) continue;
321 #if defined(PETSC_USE_DEBUG)
322     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
323 #endif
324     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
325       row = im[i] - rstart_orig;              /* local row index */
326       for (j=0; j<n; j++) {
327         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
328         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
329           col = in[j] - cstart_orig;          /* local col index */
330           brow = row/bs; bcol = col/bs;
331           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
332           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
333           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
334           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
335         } else if (in[j] < 0) continue;
336 #if defined(PETSC_USE_DEBUG)
337         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
338 #endif
339         else {  /* off-diag entry (B) */
340           if (mat->was_assembled) {
341             if (!baij->colmap) {
342               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
343             }
344 #if defined (PETSC_USE_CTABLE)
345             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
346             col  = col - 1;
347 #else
348             col = baij->colmap[in[j]/bs] - 1;
349 #endif
350             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
351               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
352               col =  in[j];
353               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
354               B = baij->B;
355               b = (Mat_SeqBAIJ*)(B)->data;
356               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
357               ba=b->a;
358             } else col += in[j]%bs;
359           } else col = in[j];
360           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
361           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
362           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
363         }
364       }
365     } else {  /* off processor entry */
366       if (!baij->donotstash) {
367         n_loc = 0;
368         for (j=0; j<n; j++){
369           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
370           in_loc[n_loc] = in[j];
371           if (roworiented) {
372             v_loc[n_loc] = v[i*n+j];
373           } else {
374             v_loc[n_loc] = v[j*m+i];
375           }
376           n_loc++;
377         }
378         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
379       }
380     }
381   }
382 
383   if(!baij->donotstash){
384     ierr = PetscFree(in_loc);CHKERRQ(ierr);
385     ierr = PetscFree(v_loc);CHKERRQ(ierr);
386   }
387   PetscFunctionReturn(0);
388 }
389 
390 #undef __FUNCT__
391 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
392 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
393 {
394   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
395   const MatScalar *value;
396   MatScalar       *barray=baij->barray;
397   PetscTruth      roworiented = baij->roworiented;
398   PetscErrorCode  ierr;
399   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
400   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
401   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
402 
403   PetscFunctionBegin;
404   if(!barray) {
405     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
406     baij->barray = barray;
407   }
408 
409   if (roworiented) {
410     stepval = (n-1)*bs;
411   } else {
412     stepval = (m-1)*bs;
413   }
414   for (i=0; i<m; i++) {
415     if (im[i] < 0) continue;
416 #if defined(PETSC_USE_DEBUG)
417     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
418 #endif
419     if (im[i] >= rstart && im[i] < rend) {
420       row = im[i] - rstart;
421       for (j=0; j<n; j++) {
422         /* If NumCol = 1 then a copy is not required */
423         if ((roworiented) && (n == 1)) {
424           barray = (MatScalar*) v + i*bs2;
425         } else if((!roworiented) && (m == 1)) {
426           barray = (MatScalar*) v + j*bs2;
427         } else { /* Here a copy is required */
428           if (roworiented) {
429             value = v + i*(stepval+bs)*bs + j*bs;
430           } else {
431             value = v + j*(stepval+bs)*bs + i*bs;
432           }
433           for (ii=0; ii<bs; ii++,value+=stepval) {
434             for (jj=0; jj<bs; jj++) {
435               *barray++  = *value++;
436             }
437           }
438           barray -=bs2;
439         }
440 
441         if (in[j] >= cstart && in[j] < cend){
442           col  = in[j] - cstart;
443           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
444         }
445         else if (in[j] < 0) continue;
446 #if defined(PETSC_USE_DEBUG)
447         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
448 #endif
449         else {
450           if (mat->was_assembled) {
451             if (!baij->colmap) {
452               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
453             }
454 
455 #if defined(PETSC_USE_DEBUG)
456 #if defined (PETSC_USE_CTABLE)
457             { PetscInt data;
458               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
459               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
460             }
461 #else
462             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463 #endif
464 #endif
465 #if defined (PETSC_USE_CTABLE)
466 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
467             col  = (col - 1)/bs;
468 #else
469             col = (baij->colmap[in[j]] - 1)/bs;
470 #endif
471             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
472               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
473               col =  in[j];
474             }
475           }
476           else col = in[j];
477           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
478         }
479       }
480     } else {
481       if (!baij->donotstash) {
482         if (roworiented) {
483           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
484         } else {
485           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
486         }
487       }
488     }
489   }
490   PetscFunctionReturn(0);
491 }
492 
493 #undef __FUNCT__
494 #define __FUNCT__ "MatGetValues_MPISBAIJ"
495 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
496 {
497   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
498   PetscErrorCode ierr;
499   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
500   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
501 
502   PetscFunctionBegin;
503   for (i=0; i<m; i++) {
504     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
505     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
506     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
507       row = idxm[i] - bsrstart;
508       for (j=0; j<n; j++) {
509         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
510         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
511         if (idxn[j] >= bscstart && idxn[j] < bscend){
512           col = idxn[j] - bscstart;
513           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
514         } else {
515           if (!baij->colmap) {
516             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
517           }
518 #if defined (PETSC_USE_CTABLE)
519           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
520           data --;
521 #else
522           data = baij->colmap[idxn[j]/bs]-1;
523 #endif
524           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
525           else {
526             col  = data + idxn[j]%bs;
527             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
528           }
529         }
530       }
531     } else {
532       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
533     }
534   }
535  PetscFunctionReturn(0);
536 }
537 
538 #undef __FUNCT__
539 #define __FUNCT__ "MatNorm_MPISBAIJ"
540 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
541 {
542   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
543   PetscErrorCode ierr;
544   PetscReal      sum[2],*lnorm2;
545 
546   PetscFunctionBegin;
547   if (baij->size == 1) {
548     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
549   } else {
550     if (type == NORM_FROBENIUS) {
551       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
552       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
553       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
554       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
555       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
556       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
557       *norm = sqrt(sum[0] + 2*sum[1]);
558       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
559     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
560       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
561       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
562       PetscReal    *rsum,*rsum2,vabs;
563       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
564       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
565       MatScalar    *v;
566 
567       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
568       rsum2 = rsum + mat->N;
569       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
570       /* Amat */
571       v = amat->a; jj = amat->j;
572       for (brow=0; brow<mbs; brow++) {
573         grow = bs*(rstart + brow);
574         nz = amat->i[brow+1] - amat->i[brow];
575         for (bcol=0; bcol<nz; bcol++){
576           gcol = bs*(rstart + *jj); jj++;
577           for (col=0; col<bs; col++){
578             for (row=0; row<bs; row++){
579               vabs = PetscAbsScalar(*v); v++;
580               rsum[gcol+col] += vabs;
581               /* non-diagonal block */
582               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
583             }
584           }
585         }
586       }
587       /* Bmat */
588       v = bmat->a; jj = bmat->j;
589       for (brow=0; brow<mbs; brow++) {
590         grow = bs*(rstart + brow);
591         nz = bmat->i[brow+1] - bmat->i[brow];
592         for (bcol=0; bcol<nz; bcol++){
593           gcol = bs*garray[*jj]; jj++;
594           for (col=0; col<bs; col++){
595             for (row=0; row<bs; row++){
596               vabs = PetscAbsScalar(*v); v++;
597               rsum[gcol+col] += vabs;
598               rsum[grow+row] += vabs;
599             }
600           }
601         }
602       }
603       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
604       *norm = 0.0;
605       for (col=0; col<mat->N; col++) {
606         if (rsum2[col] > *norm) *norm = rsum2[col];
607       }
608       ierr = PetscFree(rsum);CHKERRQ(ierr);
609     } else {
610       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
611     }
612   }
613   PetscFunctionReturn(0);
614 }
615 
616 #undef __FUNCT__
617 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
618 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
619 {
620   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
621   PetscErrorCode ierr;
622   PetscInt       nstash,reallocs;
623   InsertMode     addv;
624 
625   PetscFunctionBegin;
626   if (baij->donotstash) {
627     PetscFunctionReturn(0);
628   }
629 
630   /* make sure all processors are either in INSERTMODE or ADDMODE */
631   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
632   if (addv == (ADD_VALUES|INSERT_VALUES)) {
633     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
634   }
635   mat->insertmode = addv; /* in case this processor had no cache */
636 
637   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
638   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
639   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
640   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
641   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
642   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
643   PetscFunctionReturn(0);
644 }
645 
646 #undef __FUNCT__
647 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
648 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
649 {
650   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
651   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
652   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
653   PetscErrorCode ierr;
654   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
655   PetscInt       *row,*col,other_disassembled;
656   PetscMPIInt    n;
657   PetscTruth     r1,r2,r3;
658   MatScalar      *val;
659   InsertMode     addv = mat->insertmode;
660 
661   PetscFunctionBegin;
662 
663   if (!baij->donotstash) {
664     while (1) {
665       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
666       if (!flg) break;
667 
668       for (i=0; i<n;) {
669         /* Now identify the consecutive vals belonging to the same row */
670         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
671         if (j < n) ncols = j-i;
672         else       ncols = n-i;
673         /* Now assemble all these values with a single function call */
674         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
675         i = j;
676       }
677     }
678     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
679     /* Now process the block-stash. Since the values are stashed column-oriented,
680        set the roworiented flag to column oriented, and after MatSetValues()
681        restore the original flags */
682     r1 = baij->roworiented;
683     r2 = a->roworiented;
684     r3 = b->roworiented;
685     baij->roworiented = PETSC_FALSE;
686     a->roworiented    = PETSC_FALSE;
687     b->roworiented    = PETSC_FALSE;
688     while (1) {
689       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
695         if (j < n) ncols = j-i;
696         else       ncols = n-i;
697         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
702     baij->roworiented = r1;
703     a->roworiented    = r2;
704     b->roworiented    = r3;
705   }
706 
707   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
708   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
709 
710   /* determine if any processor has disassembled, if so we must
711      also disassemble ourselfs, in order that we may reassemble. */
712   /*
713      if nonzero structure of submatrix B cannot change then we know that
714      no processor disassembled thus we can skip this stuff
715   */
716   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
717     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
718     if (mat->was_assembled && !other_disassembled) {
719       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
720     }
721   }
722 
723   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
724     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
725   }
726   b->compressedrow.use = PETSC_TRUE;
727   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
728   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
729 
730   if (baij->rowvalues) {
731     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
732     baij->rowvalues = 0;
733   }
734 
735   PetscFunctionReturn(0);
736 }
737 
738 #undef __FUNCT__
739 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
740 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
741 {
742   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
743   PetscErrorCode    ierr;
744   PetscInt          bs = mat->bs;
745   PetscMPIInt       size = baij->size,rank = baij->rank;
746   PetscTruth        iascii,isdraw;
747   PetscViewer       sviewer;
748   PetscViewerFormat format;
749 
750   PetscFunctionBegin;
751   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
752   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
753   if (iascii) {
754     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
755     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
756       MatInfo info;
757       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
758       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
759       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
760               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
761               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
762       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
763       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
764       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
765       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
766       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
767       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
768       PetscFunctionReturn(0);
769     } else if (format == PETSC_VIEWER_ASCII_INFO) {
770       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
771       PetscFunctionReturn(0);
772     }
773   }
774 
775   if (isdraw) {
776     PetscDraw       draw;
777     PetscTruth isnull;
778     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
779     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
780   }
781 
782   if (size == 1) {
783     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
784     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
785   } else {
786     /* assemble the entire matrix onto first processor. */
787     Mat         A;
788     Mat_SeqSBAIJ *Aloc;
789     Mat_SeqBAIJ *Bloc;
790     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
791     MatScalar   *a;
792 
793     /* Should this be the same type as mat? */
794     if (!rank) {
795       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
796     } else {
797       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
798     }
799     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
800     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
801     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
802 
803     /* copy over the A part */
804     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
805     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
806     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
807 
808     for (i=0; i<mbs; i++) {
809       rvals[0] = bs*(baij->rstart + i);
810       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
811       for (j=ai[i]; j<ai[i+1]; j++) {
812         col = (baij->cstart+aj[j])*bs;
813         for (k=0; k<bs; k++) {
814           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
815           col++; a += bs;
816         }
817       }
818     }
819     /* copy over the B part */
820     Bloc = (Mat_SeqBAIJ*)baij->B->data;
821     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
822     for (i=0; i<mbs; i++) {
823       rvals[0] = bs*(baij->rstart + i);
824       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
825       for (j=ai[i]; j<ai[i+1]; j++) {
826         col = baij->garray[aj[j]]*bs;
827         for (k=0; k<bs; k++) {
828           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
829           col++; a += bs;
830         }
831       }
832     }
833     ierr = PetscFree(rvals);CHKERRQ(ierr);
834     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
835     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
836     /*
837        Everyone has to call to draw the matrix since the graphics waits are
838        synchronized across all processors that share the PetscDraw object
839     */
840     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
841     if (!rank) {
842       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
843       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
844     }
845     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
846     ierr = MatDestroy(A);CHKERRQ(ierr);
847   }
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatView_MPISBAIJ"
853 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
854 {
855   PetscErrorCode ierr;
856   PetscTruth     iascii,isdraw,issocket,isbinary;
857 
858   PetscFunctionBegin;
859   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
860   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
861   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
862   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
863   if (iascii || isdraw || issocket || isbinary) {
864     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
865   } else {
866     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
867   }
868   PetscFunctionReturn(0);
869 }
870 
871 #undef __FUNCT__
872 #define __FUNCT__ "MatDestroy_MPISBAIJ"
873 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
874 {
875   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
876   PetscErrorCode ierr;
877 
878   PetscFunctionBegin;
879 #if defined(PETSC_USE_LOG)
880   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
881 #endif
882   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
883   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
884   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
885   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
886   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
887 #if defined (PETSC_USE_CTABLE)
888   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
889 #else
890   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
891 #endif
892   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
893   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
894   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
895   if (baij->slvec0) {
896     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
897     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
898   }
899   if (baij->slvec1) {
900     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
901     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
902     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
903   }
904   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
905   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
906   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
907   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
908 #if defined(PETSC_USE_MAT_SINGLE)
909   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
910 #endif
911   ierr = PetscFree(baij);CHKERRQ(ierr);
912 
913   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
914   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
915   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "MatMult_MPISBAIJ"
922 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
923 {
924   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
925   PetscErrorCode ierr;
926   PetscInt       nt,mbs=a->mbs,bs=A->bs;
927   PetscScalar    *x,*from,zero=0.0;
928 
929   PetscFunctionBegin;
930   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
931   if (nt != A->n) {
932     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
933   }
934   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
935   if (nt != A->m) {
936     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
937   }
938 
939   /* diagonal part */
940   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
941   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
942 
943   /* subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
945 
946   /* copy x into the vec slvec0 */
947   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
950   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
951 
952   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
955 
956   /* supperdiagonal part */
957   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
958 
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
964 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968   PetscInt       nt;
969 
970   PetscFunctionBegin;
971   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
972   if (nt != A->n) {
973     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
974   }
975   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
976   if (nt != A->m) {
977     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
978   }
979 
980   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
981   /* do diagonal part */
982   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
983   /* do supperdiagonal part */
984   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
985   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
986   /* do subdiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
988   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
989   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
990 
991   PetscFunctionReturn(0);
992 }
993 
994 #undef __FUNCT__
995 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
996 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
997 {
998   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
999   PetscErrorCode ierr;
1000   PetscInt       mbs=a->mbs,bs=A->bs;
1001   PetscScalar    *x,*from,zero=0.0;
1002 
1003   PetscFunctionBegin;
1004   /*
1005   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1006   PetscSynchronizedFlush(A->comm);
1007   */
1008   /* diagonal part */
1009   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1010   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1011 
1012   /* subdiagonal part */
1013   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1014 
1015   /* copy x into the vec slvec0 */
1016   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1017   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1018   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1019   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1020 
1021   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1022   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1023   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1024 
1025   /* supperdiagonal part */
1026   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1027 
1028   PetscFunctionReturn(0);
1029 }
1030 
1031 #undef __FUNCT__
1032 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1033 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1034 {
1035   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1036   PetscErrorCode ierr;
1037 
1038   PetscFunctionBegin;
1039   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1040   /* do diagonal part */
1041   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1042   /* do supperdiagonal part */
1043   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1044   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1045 
1046   /* do subdiagonal part */
1047   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1048   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1049   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1050 
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1056 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1057 {
1058   PetscErrorCode ierr;
1059 
1060   PetscFunctionBegin;
1061   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1062   PetscFunctionReturn(0);
1063 }
1064 
1065 #undef __FUNCT__
1066 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1067 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1068 {
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 /*
1077   This only works correctly for square matrices where the subblock A->A is the
1078    diagonal block
1079 */
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1082 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1083 {
1084   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1085   PetscErrorCode ierr;
1086 
1087   PetscFunctionBegin;
1088   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1089   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1090   PetscFunctionReturn(0);
1091 }
1092 
1093 #undef __FUNCT__
1094 #define __FUNCT__ "MatScale_MPISBAIJ"
1095 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1096 {
1097   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1098   PetscErrorCode ierr;
1099 
1100   PetscFunctionBegin;
1101   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1102   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1103   PetscFunctionReturn(0);
1104 }
1105 
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1108 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1109 {
1110   PetscFunctionBegin;
1111   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1117 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1118 {
1119   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1120 
1121   PetscFunctionBegin;
1122   if (!baij->getrowactive) {
1123     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1124   }
1125   baij->getrowactive = PETSC_FALSE;
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1131 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1132 {
1133   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1134   PetscErrorCode ierr;
1135 
1136   PetscFunctionBegin;
1137   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1138   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1144 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1147   Mat            A = a->A,B = a->B;
1148   PetscErrorCode ierr;
1149   PetscReal      isend[5],irecv[5];
1150 
1151   PetscFunctionBegin;
1152   info->block_size     = (PetscReal)matin->bs;
1153   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1154   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1155   isend[3] = info->memory;  isend[4] = info->mallocs;
1156   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1158   isend[3] += info->memory;  isend[4] += info->mallocs;
1159   if (flag == MAT_LOCAL) {
1160     info->nz_used      = isend[0];
1161     info->nz_allocated = isend[1];
1162     info->nz_unneeded  = isend[2];
1163     info->memory       = isend[3];
1164     info->mallocs      = isend[4];
1165   } else if (flag == MAT_GLOBAL_MAX) {
1166     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1167     info->nz_used      = irecv[0];
1168     info->nz_allocated = irecv[1];
1169     info->nz_unneeded  = irecv[2];
1170     info->memory       = irecv[3];
1171     info->mallocs      = irecv[4];
1172   } else if (flag == MAT_GLOBAL_SUM) {
1173     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1174     info->nz_used      = irecv[0];
1175     info->nz_allocated = irecv[1];
1176     info->nz_unneeded  = irecv[2];
1177     info->memory       = irecv[3];
1178     info->mallocs      = irecv[4];
1179   } else {
1180     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1181   }
1182   info->rows_global       = (PetscReal)A->M;
1183   info->columns_global    = (PetscReal)A->N;
1184   info->rows_local        = (PetscReal)A->m;
1185   info->columns_local     = (PetscReal)A->N;
1186   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1187   info->fill_ratio_needed = 0;
1188   info->factor_mallocs    = 0;
1189   PetscFunctionReturn(0);
1190 }
1191 
1192 #undef __FUNCT__
1193 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1194 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1195 {
1196   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1197   PetscErrorCode ierr;
1198 
1199   PetscFunctionBegin;
1200   switch (op) {
1201   case MAT_NO_NEW_NONZERO_LOCATIONS:
1202   case MAT_YES_NEW_NONZERO_LOCATIONS:
1203   case MAT_COLUMNS_UNSORTED:
1204   case MAT_COLUMNS_SORTED:
1205   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1206   case MAT_KEEP_ZEROED_ROWS:
1207   case MAT_NEW_NONZERO_LOCATION_ERR:
1208     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1209     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1210     break;
1211   case MAT_ROW_ORIENTED:
1212     a->roworiented = PETSC_TRUE;
1213     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1214     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1215     break;
1216   case MAT_ROWS_SORTED:
1217   case MAT_ROWS_UNSORTED:
1218   case MAT_YES_NEW_DIAGONALS:
1219     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1220     break;
1221   case MAT_COLUMN_ORIENTED:
1222     a->roworiented = PETSC_FALSE;
1223     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1224     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1225     break;
1226   case MAT_IGNORE_OFF_PROC_ENTRIES:
1227     a->donotstash = PETSC_TRUE;
1228     break;
1229   case MAT_NO_NEW_DIAGONALS:
1230     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1231   case MAT_USE_HASH_TABLE:
1232     a->ht_flag = PETSC_TRUE;
1233     break;
1234   case MAT_NOT_SYMMETRIC:
1235   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1236   case MAT_HERMITIAN:
1237     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1238   case MAT_SYMMETRIC:
1239   case MAT_STRUCTURALLY_SYMMETRIC:
1240   case MAT_NOT_HERMITIAN:
1241   case MAT_SYMMETRY_ETERNAL:
1242   case MAT_NOT_SYMMETRY_ETERNAL:
1243     break;
1244   default:
1245     SETERRQ(PETSC_ERR_SUP,"unknown option");
1246   }
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1252 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1253 {
1254   PetscErrorCode ierr;
1255   PetscFunctionBegin;
1256   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1257   PetscFunctionReturn(0);
1258 }
1259 
1260 #undef __FUNCT__
1261 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1262 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1263 {
1264   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1265   Mat            a=baij->A, b=baij->B;
1266   PetscErrorCode ierr;
1267   PetscInt       nv,m,n;
1268   PetscTruth     flg;
1269 
1270   PetscFunctionBegin;
1271   if (ll != rr){
1272     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1273     if (!flg)
1274       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1275   }
1276   if (!ll) PetscFunctionReturn(0);
1277 
1278   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1279   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1280 
1281   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1282   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1283 
1284   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1285 
1286   /* left diagonalscale the off-diagonal part */
1287   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1288 
1289   /* scale the diagonal part */
1290   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1291 
1292   /* right diagonalscale the off-diagonal part */
1293   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1294   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1300 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1301 {
1302   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1303   MPI_Comm          comm = A->comm;
1304   static PetscTruth called = PETSC_FALSE;
1305   PetscErrorCode    ierr;
1306 
1307   PetscFunctionBegin;
1308   if (!a->rank) {
1309     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1310   }
1311   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1312   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1313   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 #undef __FUNCT__
1318 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1319 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1320 {
1321   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1322   PetscErrorCode ierr;
1323 
1324   PetscFunctionBegin;
1325   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1326   PetscFunctionReturn(0);
1327 }
1328 
1329 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1330 
1331 #undef __FUNCT__
1332 #define __FUNCT__ "MatEqual_MPISBAIJ"
1333 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1334 {
1335   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1336   Mat            a,b,c,d;
1337   PetscTruth     flg;
1338   PetscErrorCode ierr;
1339 
1340   PetscFunctionBegin;
1341   a = matA->A; b = matA->B;
1342   c = matB->A; d = matB->B;
1343 
1344   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1345   if (flg) {
1346     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1347   }
1348   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1349   PetscFunctionReturn(0);
1350 }
1351 
1352 #undef __FUNCT__
1353 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1354 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1355 {
1356   PetscErrorCode ierr;
1357 
1358   PetscFunctionBegin;
1359   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1360   PetscFunctionReturn(0);
1361 }
1362 
1363 #undef __FUNCT__
1364 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1365 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1366 {
1367   PetscErrorCode ierr;
1368   PetscInt       i;
1369   PetscTruth     flg;
1370 
1371   PetscFunctionBegin;
1372   for (i=0; i<n; i++) {
1373     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1374     if (!flg) {
1375       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1376     }
1377   }
1378   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1379   PetscFunctionReturn(0);
1380 }
1381 
1382 
1383 /* -------------------------------------------------------------------*/
1384 static struct _MatOps MatOps_Values = {
1385        MatSetValues_MPISBAIJ,
1386        MatGetRow_MPISBAIJ,
1387        MatRestoreRow_MPISBAIJ,
1388        MatMult_MPISBAIJ,
1389 /* 4*/ MatMultAdd_MPISBAIJ,
1390        MatMultTranspose_MPISBAIJ,
1391        MatMultTransposeAdd_MPISBAIJ,
1392        0,
1393        0,
1394        0,
1395 /*10*/ 0,
1396        0,
1397        0,
1398        MatRelax_MPISBAIJ,
1399        MatTranspose_MPISBAIJ,
1400 /*15*/ MatGetInfo_MPISBAIJ,
1401        MatEqual_MPISBAIJ,
1402        MatGetDiagonal_MPISBAIJ,
1403        MatDiagonalScale_MPISBAIJ,
1404        MatNorm_MPISBAIJ,
1405 /*20*/ MatAssemblyBegin_MPISBAIJ,
1406        MatAssemblyEnd_MPISBAIJ,
1407        0,
1408        MatSetOption_MPISBAIJ,
1409        MatZeroEntries_MPISBAIJ,
1410 /*25*/ 0,
1411        0,
1412        0,
1413        0,
1414        0,
1415 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1416        0,
1417        0,
1418        0,
1419        0,
1420 /*35*/ MatDuplicate_MPISBAIJ,
1421        0,
1422        0,
1423        0,
1424        0,
1425 /*40*/ 0,
1426        MatGetSubMatrices_MPISBAIJ,
1427        MatIncreaseOverlap_MPISBAIJ,
1428        MatGetValues_MPISBAIJ,
1429        0,
1430 /*45*/ MatPrintHelp_MPISBAIJ,
1431        MatScale_MPISBAIJ,
1432        0,
1433        0,
1434        0,
1435 /*50*/ 0,
1436        0,
1437        0,
1438        0,
1439        0,
1440 /*55*/ 0,
1441        0,
1442        MatSetUnfactored_MPISBAIJ,
1443        0,
1444        MatSetValuesBlocked_MPISBAIJ,
1445 /*60*/ 0,
1446        0,
1447        0,
1448        MatGetPetscMaps_Petsc,
1449        0,
1450 /*65*/ 0,
1451        0,
1452        0,
1453        0,
1454        0,
1455 /*70*/ MatGetRowMax_MPISBAIJ,
1456        0,
1457        0,
1458        0,
1459        0,
1460 /*75*/ 0,
1461        0,
1462        0,
1463        0,
1464        0,
1465 /*80*/ 0,
1466        0,
1467        0,
1468        0,
1469        MatLoad_MPISBAIJ,
1470 /*85*/ 0,
1471        0,
1472        0,
1473        0,
1474        0,
1475 /*90*/ 0,
1476        0,
1477        0,
1478        0,
1479        0,
1480 /*95*/ 0,
1481        0,
1482        0,
1483        0};
1484 
1485 
1486 EXTERN_C_BEGIN
1487 #undef __FUNCT__
1488 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1489 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1490 {
1491   PetscFunctionBegin;
1492   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1493   *iscopy = PETSC_FALSE;
1494   PetscFunctionReturn(0);
1495 }
1496 EXTERN_C_END
1497 
1498 EXTERN_C_BEGIN
1499 #undef __FUNCT__
1500 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1501 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1502 {
1503   Mat_MPISBAIJ   *b;
1504   PetscErrorCode ierr;
1505   PetscInt       i,mbs,Mbs;
1506 
1507   PetscFunctionBegin;
1508   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1509 
1510   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1511   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1512   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1513   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1514   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1515   if (d_nnz) {
1516     for (i=0; i<B->m/bs; i++) {
1517       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1518     }
1519   }
1520   if (o_nnz) {
1521     for (i=0; i<B->m/bs; i++) {
1522       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1523     }
1524   }
1525   B->preallocated = PETSC_TRUE;
1526   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1527   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1528   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1529   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1530 
1531   b   = (Mat_MPISBAIJ*)B->data;
1532   mbs = B->m/bs;
1533   Mbs = B->M/bs;
1534   if (mbs*bs != B->m) {
1535     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1536   }
1537 
1538   B->bs  = bs;
1539   b->bs2 = bs*bs;
1540   b->mbs = mbs;
1541   b->nbs = mbs;
1542   b->Mbs = Mbs;
1543   b->Nbs = Mbs;
1544 
1545   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1546   b->rowners[0]    = 0;
1547   for (i=2; i<=b->size; i++) {
1548     b->rowners[i] += b->rowners[i-1];
1549   }
1550   b->rstart    = b->rowners[b->rank];
1551   b->rend      = b->rowners[b->rank+1];
1552   b->cstart    = b->rstart;
1553   b->cend      = b->rend;
1554   for (i=0; i<=b->size; i++) {
1555     b->rowners_bs[i] = b->rowners[i]*bs;
1556   }
1557   b->rstart_bs = b-> rstart*bs;
1558   b->rend_bs   = b->rend*bs;
1559 
1560   b->cstart_bs = b->cstart*bs;
1561   b->cend_bs   = b->cend*bs;
1562 
1563   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1564   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1565   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1566   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1567 
1568   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1569   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1570   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1571   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1572 
1573   /* build cache for off array entries formed */
1574   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1575 
1576   PetscFunctionReturn(0);
1577 }
1578 EXTERN_C_END
1579 
1580 /*MC
1581    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1582    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1583 
1584    Options Database Keys:
1585 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1586 
1587   Level: beginner
1588 
1589 .seealso: MatCreateMPISBAIJ
1590 M*/
1591 
1592 EXTERN_C_BEGIN
1593 #undef __FUNCT__
1594 #define __FUNCT__ "MatCreate_MPISBAIJ"
1595 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1596 {
1597   Mat_MPISBAIJ   *b;
1598   PetscErrorCode ierr;
1599   PetscTruth     flg;
1600 
1601   PetscFunctionBegin;
1602 
1603   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1604   B->data = (void*)b;
1605   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1606 
1607   B->ops->destroy    = MatDestroy_MPISBAIJ;
1608   B->ops->view       = MatView_MPISBAIJ;
1609   B->mapping    = 0;
1610   B->factor     = 0;
1611   B->assembled  = PETSC_FALSE;
1612 
1613   B->insertmode = NOT_SET_VALUES;
1614   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1615   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1616 
1617   /* build local table of row and column ownerships */
1618   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1619   b->cowners    = b->rowners + b->size + 2;
1620   b->rowners_bs = b->cowners + b->size + 2;
1621   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1622 
1623   /* build cache for off array entries formed */
1624   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1625   b->donotstash  = PETSC_FALSE;
1626   b->colmap      = PETSC_NULL;
1627   b->garray      = PETSC_NULL;
1628   b->roworiented = PETSC_TRUE;
1629 
1630 #if defined(PETSC_USE_MAT_SINGLE)
1631   /* stuff for MatSetValues_XXX in single precision */
1632   b->setvalueslen     = 0;
1633   b->setvaluescopy    = PETSC_NULL;
1634 #endif
1635 
1636   /* stuff used in block assembly */
1637   b->barray       = 0;
1638 
1639   /* stuff used for matrix vector multiply */
1640   b->lvec         = 0;
1641   b->Mvctx        = 0;
1642   b->slvec0       = 0;
1643   b->slvec0b      = 0;
1644   b->slvec1       = 0;
1645   b->slvec1a      = 0;
1646   b->slvec1b      = 0;
1647   b->sMvctx       = 0;
1648 
1649   /* stuff for MatGetRow() */
1650   b->rowindices   = 0;
1651   b->rowvalues    = 0;
1652   b->getrowactive = PETSC_FALSE;
1653 
1654   /* hash table stuff */
1655   b->ht           = 0;
1656   b->hd           = 0;
1657   b->ht_size      = 0;
1658   b->ht_flag      = PETSC_FALSE;
1659   b->ht_fact      = 0;
1660   b->ht_total_ct  = 0;
1661   b->ht_insert_ct = 0;
1662 
1663   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1664   if (flg) {
1665     PetscReal fact = 1.39;
1666     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1667     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1668     if (fact <= 1.0) fact = 1.39;
1669     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1670     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1671   }
1672   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1673                                      "MatStoreValues_MPISBAIJ",
1674                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1676                                      "MatRetrieveValues_MPISBAIJ",
1677                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1679                                      "MatGetDiagonalBlock_MPISBAIJ",
1680                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1682                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1683                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1684   B->symmetric                  = PETSC_TRUE;
1685   B->structurally_symmetric     = PETSC_TRUE;
1686   B->symmetric_set              = PETSC_TRUE;
1687   B->structurally_symmetric_set = PETSC_TRUE;
1688   PetscFunctionReturn(0);
1689 }
1690 EXTERN_C_END
1691 
1692 /*MC
1693    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1694 
1695    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1696    and MATMPISBAIJ otherwise.
1697 
1698    Options Database Keys:
1699 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1700 
1701   Level: beginner
1702 
1703 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1704 M*/
1705 
1706 EXTERN_C_BEGIN
1707 #undef __FUNCT__
1708 #define __FUNCT__ "MatCreate_SBAIJ"
1709 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1710 {
1711   PetscErrorCode ierr;
1712   PetscMPIInt    size;
1713 
1714   PetscFunctionBegin;
1715   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1716   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1717   if (size == 1) {
1718     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1719   } else {
1720     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1721   }
1722   PetscFunctionReturn(0);
1723 }
1724 EXTERN_C_END
1725 
1726 #undef __FUNCT__
1727 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1728 /*@C
1729    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1730    the user should preallocate the matrix storage by setting the parameters
1731    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1732    performance can be increased by more than a factor of 50.
1733 
1734    Collective on Mat
1735 
1736    Input Parameters:
1737 +  A - the matrix
1738 .  bs   - size of blockk
1739 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1740            submatrix  (same for all local rows)
1741 .  d_nnz - array containing the number of block nonzeros in the various block rows
1742            in the upper triangular and diagonal part of the in diagonal portion of the local
1743            (possibly different for each block row) or PETSC_NULL.  You must leave room
1744            for the diagonal entry even if it is zero.
1745 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1746            submatrix (same for all local rows).
1747 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1748            off-diagonal portion of the local submatrix (possibly different for
1749            each block row) or PETSC_NULL.
1750 
1751 
1752    Options Database Keys:
1753 .   -mat_no_unroll - uses code that does not unroll the loops in the
1754                      block calculations (much slower)
1755 .   -mat_block_size - size of the blocks to use
1756 
1757    Notes:
1758 
1759    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1760    than it must be used on all processors that share the object for that argument.
1761 
1762    If the *_nnz parameter is given then the *_nz parameter is ignored
1763 
1764    Storage Information:
1765    For a square global matrix we define each processor's diagonal portion
1766    to be its local rows and the corresponding columns (a square submatrix);
1767    each processor's off-diagonal portion encompasses the remainder of the
1768    local matrix (a rectangular submatrix).
1769 
1770    The user can specify preallocated storage for the diagonal part of
1771    the local submatrix with either d_nz or d_nnz (not both).  Set
1772    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1773    memory allocation.  Likewise, specify preallocated storage for the
1774    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1775 
1776    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1777    the figure below we depict these three local rows and all columns (0-11).
1778 
1779 .vb
1780            0 1 2 3 4 5 6 7 8 9 10 11
1781           -------------------
1782    row 3  |  o o o d d d o o o o o o
1783    row 4  |  o o o d d d o o o o o o
1784    row 5  |  o o o d d d o o o o o o
1785           -------------------
1786 .ve
1787 
1788    Thus, any entries in the d locations are stored in the d (diagonal)
1789    submatrix, and any entries in the o locations are stored in the
1790    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1791    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1792 
1793    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1794    plus the diagonal part of the d matrix,
1795    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1796    In general, for PDE problems in which most nonzeros are near the diagonal,
1797    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1798    or you will get TERRIBLE performance; see the users' manual chapter on
1799    matrices.
1800 
1801    Level: intermediate
1802 
1803 .keywords: matrix, block, aij, compressed row, sparse, parallel
1804 
1805 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1806 @*/
1807 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1808 {
1809   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1810 
1811   PetscFunctionBegin;
1812   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1813   if (f) {
1814     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 
1819 #undef __FUNCT__
1820 #define __FUNCT__ "MatCreateMPISBAIJ"
1821 /*@C
1822    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1823    (block compressed row).  For good matrix assembly performance
1824    the user should preallocate the matrix storage by setting the parameters
1825    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1826    performance can be increased by more than a factor of 50.
1827 
1828    Collective on MPI_Comm
1829 
1830    Input Parameters:
1831 +  comm - MPI communicator
1832 .  bs   - size of blockk
1833 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1834            This value should be the same as the local size used in creating the
1835            y vector for the matrix-vector product y = Ax.
1836 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1837            This value should be the same as the local size used in creating the
1838            x vector for the matrix-vector product y = Ax.
1839 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1840 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1841 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1842            submatrix  (same for all local rows)
1843 .  d_nnz - array containing the number of block nonzeros in the various block rows
1844            in the upper triangular portion of the in diagonal portion of the local
1845            (possibly different for each block block row) or PETSC_NULL.
1846            You must leave room for the diagonal entry even if it is zero.
1847 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1848            submatrix (same for all local rows).
1849 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1850            off-diagonal portion of the local submatrix (possibly different for
1851            each block row) or PETSC_NULL.
1852 
1853    Output Parameter:
1854 .  A - the matrix
1855 
1856    Options Database Keys:
1857 .   -mat_no_unroll - uses code that does not unroll the loops in the
1858                      block calculations (much slower)
1859 .   -mat_block_size - size of the blocks to use
1860 .   -mat_mpi - use the parallel matrix data structures even on one processor
1861                (defaults to using SeqBAIJ format on one processor)
1862 
1863    Notes:
1864    The number of rows and columns must be divisible by blocksize.
1865 
1866    The user MUST specify either the local or global matrix dimensions
1867    (possibly both).
1868 
1869    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1870    than it must be used on all processors that share the object for that argument.
1871 
1872    If the *_nnz parameter is given then the *_nz parameter is ignored
1873 
1874    Storage Information:
1875    For a square global matrix we define each processor's diagonal portion
1876    to be its local rows and the corresponding columns (a square submatrix);
1877    each processor's off-diagonal portion encompasses the remainder of the
1878    local matrix (a rectangular submatrix).
1879 
1880    The user can specify preallocated storage for the diagonal part of
1881    the local submatrix with either d_nz or d_nnz (not both).  Set
1882    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1883    memory allocation.  Likewise, specify preallocated storage for the
1884    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1885 
1886    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1887    the figure below we depict these three local rows and all columns (0-11).
1888 
1889 .vb
1890            0 1 2 3 4 5 6 7 8 9 10 11
1891           -------------------
1892    row 3  |  o o o d d d o o o o o o
1893    row 4  |  o o o d d d o o o o o o
1894    row 5  |  o o o d d d o o o o o o
1895           -------------------
1896 .ve
1897 
1898    Thus, any entries in the d locations are stored in the d (diagonal)
1899    submatrix, and any entries in the o locations are stored in the
1900    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1901    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1902 
1903    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1904    plus the diagonal part of the d matrix,
1905    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1906    In general, for PDE problems in which most nonzeros are near the diagonal,
1907    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1908    or you will get TERRIBLE performance; see the users' manual chapter on
1909    matrices.
1910 
1911    Level: intermediate
1912 
1913 .keywords: matrix, block, aij, compressed row, sparse, parallel
1914 
1915 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1916 @*/
1917 
1918 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1919 {
1920   PetscErrorCode ierr;
1921   PetscMPIInt    size;
1922 
1923   PetscFunctionBegin;
1924   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1925   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1926   if (size > 1) {
1927     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1928     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1929   } else {
1930     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1931     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1932   }
1933   PetscFunctionReturn(0);
1934 }
1935 
1936 
1937 #undef __FUNCT__
1938 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1939 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1940 {
1941   Mat            mat;
1942   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1943   PetscErrorCode ierr;
1944   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1945   PetscScalar    *array;
1946 
1947   PetscFunctionBegin;
1948   *newmat       = 0;
1949   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1950   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1951   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1952 
1953   mat->factor       = matin->factor;
1954   mat->preallocated = PETSC_TRUE;
1955   mat->assembled    = PETSC_TRUE;
1956   mat->insertmode   = NOT_SET_VALUES;
1957 
1958   a = (Mat_MPISBAIJ*)mat->data;
1959   mat->bs  = matin->bs;
1960   a->bs2   = oldmat->bs2;
1961   a->mbs   = oldmat->mbs;
1962   a->nbs   = oldmat->nbs;
1963   a->Mbs   = oldmat->Mbs;
1964   a->Nbs   = oldmat->Nbs;
1965 
1966   a->rstart       = oldmat->rstart;
1967   a->rend         = oldmat->rend;
1968   a->cstart       = oldmat->cstart;
1969   a->cend         = oldmat->cend;
1970   a->size         = oldmat->size;
1971   a->rank         = oldmat->rank;
1972   a->donotstash   = oldmat->donotstash;
1973   a->roworiented  = oldmat->roworiented;
1974   a->rowindices   = 0;
1975   a->rowvalues    = 0;
1976   a->getrowactive = PETSC_FALSE;
1977   a->barray       = 0;
1978   a->rstart_bs    = oldmat->rstart_bs;
1979   a->rend_bs      = oldmat->rend_bs;
1980   a->cstart_bs    = oldmat->cstart_bs;
1981   a->cend_bs      = oldmat->cend_bs;
1982 
1983   /* hash table stuff */
1984   a->ht           = 0;
1985   a->hd           = 0;
1986   a->ht_size      = 0;
1987   a->ht_flag      = oldmat->ht_flag;
1988   a->ht_fact      = oldmat->ht_fact;
1989   a->ht_total_ct  = 0;
1990   a->ht_insert_ct = 0;
1991 
1992   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1993   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1994   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1995   if (oldmat->colmap) {
1996 #if defined (PETSC_USE_CTABLE)
1997     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1998 #else
1999     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2000     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2001     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2002 #endif
2003   } else a->colmap = 0;
2004 
2005   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2006     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2007     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2008     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2009   } else a->garray = 0;
2010 
2011   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2012   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2013   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2014   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2015 
2016   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2017   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2018   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2019   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2020 
2021   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2022   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2023   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2024   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2025   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2026   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2027   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2028   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2029   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2030   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2031   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2032   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2033   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2034 
2035   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2036   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2037   a->sMvctx = oldmat->sMvctx;
2038   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2039 
2040   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2041   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2042   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2043   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2044   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2045   *newmat = mat;
2046   PetscFunctionReturn(0);
2047 }
2048 
2049 #include "petscsys.h"
2050 
2051 #undef __FUNCT__
2052 #define __FUNCT__ "MatLoad_MPISBAIJ"
2053 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2054 {
2055   Mat            A;
2056   PetscErrorCode ierr;
2057   PetscInt       i,nz,j,rstart,rend;
2058   PetscScalar    *vals,*buf;
2059   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2060   MPI_Status     status;
2061   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2062   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2063   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2064   PetscInt       bs=1,Mbs,mbs,extra_rows;
2065   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2066   PetscInt       dcount,kmax,k,nzcount,tmp;
2067   int            fd;
2068 
2069   PetscFunctionBegin;
2070   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2071 
2072   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2073   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2074   if (!rank) {
2075     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2076     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2077     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2078     if (header[3] < 0) {
2079       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2080     }
2081   }
2082 
2083   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2084   M = header[1]; N = header[2];
2085 
2086   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2087 
2088   /*
2089      This code adds extra rows to make sure the number of rows is
2090      divisible by the blocksize
2091   */
2092   Mbs        = M/bs;
2093   extra_rows = bs - M + bs*(Mbs);
2094   if (extra_rows == bs) extra_rows = 0;
2095   else                  Mbs++;
2096   if (extra_rows &&!rank) {
2097     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2098   }
2099 
2100   /* determine ownership of all rows */
2101   mbs        = Mbs/size + ((Mbs % size) > rank);
2102   m          = mbs*bs;
2103   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2104   browners   = rowners + size + 1;
2105   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2106   rowners[0] = 0;
2107   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2108   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2109   rstart = rowners[rank];
2110   rend   = rowners[rank+1];
2111 
2112   /* distribute row lengths to all processors */
2113   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2114   if (!rank) {
2115     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2116     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2117     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2118     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2119     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2120     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2121     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2122   } else {
2123     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2124   }
2125 
2126   if (!rank) {   /* procs[0] */
2127     /* calculate the number of nonzeros on each processor */
2128     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2129     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2130     for (i=0; i<size; i++) {
2131       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2132         procsnz[i] += rowlengths[j];
2133       }
2134     }
2135     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2136 
2137     /* determine max buffer needed and allocate it */
2138     maxnz = 0;
2139     for (i=0; i<size; i++) {
2140       maxnz = PetscMax(maxnz,procsnz[i]);
2141     }
2142     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2143 
2144     /* read in my part of the matrix column indices  */
2145     nz     = procsnz[0];
2146     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2147     mycols = ibuf;
2148     if (size == 1)  nz -= extra_rows;
2149     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2150     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2151 
2152     /* read in every ones (except the last) and ship off */
2153     for (i=1; i<size-1; i++) {
2154       nz   = procsnz[i];
2155       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2156       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2157     }
2158     /* read in the stuff for the last proc */
2159     if (size != 1) {
2160       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2161       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2162       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2163       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2164     }
2165     ierr = PetscFree(cols);CHKERRQ(ierr);
2166   } else {  /* procs[i], i>0 */
2167     /* determine buffer space needed for message */
2168     nz = 0;
2169     for (i=0; i<m; i++) {
2170       nz += locrowlens[i];
2171     }
2172     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2173     mycols = ibuf;
2174     /* receive message of column indices*/
2175     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2176     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2177     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2178   }
2179 
2180   /* loop over local rows, determining number of off diagonal entries */
2181   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2182   odlens   = dlens + (rend-rstart);
2183   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2184   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2185   masked1  = mask    + Mbs;
2186   masked2  = masked1 + Mbs;
2187   rowcount = 0; nzcount = 0;
2188   for (i=0; i<mbs; i++) {
2189     dcount  = 0;
2190     odcount = 0;
2191     for (j=0; j<bs; j++) {
2192       kmax = locrowlens[rowcount];
2193       for (k=0; k<kmax; k++) {
2194         tmp = mycols[nzcount++]/bs; /* block col. index */
2195         if (!mask[tmp]) {
2196           mask[tmp] = 1;
2197           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2198           else masked1[dcount++] = tmp; /* entry in diag portion */
2199         }
2200       }
2201       rowcount++;
2202     }
2203 
2204     dlens[i]  = dcount;  /* d_nzz[i] */
2205     odlens[i] = odcount; /* o_nzz[i] */
2206 
2207     /* zero out the mask elements we set */
2208     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2209     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2210   }
2211 
2212   /* create our matrix */
2213   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2214   ierr = MatSetType(A,type);CHKERRQ(ierr);
2215   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2216   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2217 
2218   if (!rank) {
2219     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2220     /* read in my part of the matrix numerical values  */
2221     nz = procsnz[0];
2222     vals = buf;
2223     mycols = ibuf;
2224     if (size == 1)  nz -= extra_rows;
2225     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2226     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2227 
2228     /* insert into matrix */
2229     jj      = rstart*bs;
2230     for (i=0; i<m; i++) {
2231       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2232       mycols += locrowlens[i];
2233       vals   += locrowlens[i];
2234       jj++;
2235     }
2236 
2237     /* read in other processors (except the last one) and ship out */
2238     for (i=1; i<size-1; i++) {
2239       nz   = procsnz[i];
2240       vals = buf;
2241       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2242       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2243     }
2244     /* the last proc */
2245     if (size != 1){
2246       nz   = procsnz[i] - extra_rows;
2247       vals = buf;
2248       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2249       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2250       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2251     }
2252     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2253 
2254   } else {
2255     /* receive numeric values */
2256     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2257 
2258     /* receive message of values*/
2259     vals   = buf;
2260     mycols = ibuf;
2261     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2262     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2263     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2264 
2265     /* insert into matrix */
2266     jj      = rstart*bs;
2267     for (i=0; i<m; i++) {
2268       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2269       mycols += locrowlens[i];
2270       vals   += locrowlens[i];
2271       jj++;
2272     }
2273   }
2274 
2275   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2276   ierr = PetscFree(buf);CHKERRQ(ierr);
2277   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2278   ierr = PetscFree(rowners);CHKERRQ(ierr);
2279   ierr = PetscFree(dlens);CHKERRQ(ierr);
2280   ierr = PetscFree(mask);CHKERRQ(ierr);
2281   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2282   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2283   *newmat = A;
2284   PetscFunctionReturn(0);
2285 }
2286 
2287 #undef __FUNCT__
2288 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2289 /*XXXXX@
2290    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2291 
2292    Input Parameters:
2293 .  mat  - the matrix
2294 .  fact - factor
2295 
2296    Collective on Mat
2297 
2298    Level: advanced
2299 
2300   Notes:
2301    This can also be set by the command line option: -mat_use_hash_table fact
2302 
2303 .keywords: matrix, hashtable, factor, HT
2304 
2305 .seealso: MatSetOption()
2306 @XXXXX*/
2307 
2308 
2309 #undef __FUNCT__
2310 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2311 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2312 {
2313   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2314   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2315   PetscReal      atmp;
2316   PetscReal      *work,*svalues,*rvalues;
2317   PetscErrorCode ierr;
2318   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2319   PetscMPIInt    rank,size;
2320   PetscInt       *rowners_bs,dest,count,source;
2321   PetscScalar    *va;
2322   MatScalar      *ba;
2323   MPI_Status     stat;
2324 
2325   PetscFunctionBegin;
2326   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2327   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2328 
2329   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2330   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2331 
2332   bs   = A->bs;
2333   mbs  = a->mbs;
2334   Mbs  = a->Mbs;
2335   ba   = b->a;
2336   bi   = b->i;
2337   bj   = b->j;
2338 
2339   /* find ownerships */
2340   rowners_bs = a->rowners_bs;
2341 
2342   /* each proc creates an array to be distributed */
2343   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2344   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2345 
2346   /* row_max for B */
2347   if (rank != size-1){
2348     for (i=0; i<mbs; i++) {
2349       ncols = bi[1] - bi[0]; bi++;
2350       brow  = bs*i;
2351       for (j=0; j<ncols; j++){
2352         bcol = bs*(*bj);
2353         for (kcol=0; kcol<bs; kcol++){
2354           col = bcol + kcol;                 /* local col index */
2355           col += rowners_bs[rank+1];      /* global col index */
2356           for (krow=0; krow<bs; krow++){
2357             atmp = PetscAbsScalar(*ba); ba++;
2358             row = brow + krow;    /* local row index */
2359             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2360             if (work[col] < atmp) work[col] = atmp;
2361           }
2362         }
2363         bj++;
2364       }
2365     }
2366 
2367     /* send values to its owners */
2368     for (dest=rank+1; dest<size; dest++){
2369       svalues = work + rowners_bs[dest];
2370       count   = rowners_bs[dest+1]-rowners_bs[dest];
2371       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2372     }
2373   }
2374 
2375   /* receive values */
2376   if (rank){
2377     rvalues = work;
2378     count   = rowners_bs[rank+1]-rowners_bs[rank];
2379     for (source=0; source<rank; source++){
2380       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2381       /* process values */
2382       for (i=0; i<count; i++){
2383         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2384       }
2385     }
2386   }
2387 
2388   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2389   ierr = PetscFree(work);CHKERRQ(ierr);
2390   PetscFunctionReturn(0);
2391 }
2392 
2393 #undef __FUNCT__
2394 #define __FUNCT__ "MatRelax_MPISBAIJ"
2395 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2396 {
2397   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2398   PetscErrorCode ierr;
2399   PetscInt       mbs=mat->mbs,bs=matin->bs;
2400   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2401   Vec            bb1;
2402 
2403   PetscFunctionBegin;
2404   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2405   if (bs > 1)
2406     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2407 
2408   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2409     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2410       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2411       its--;
2412     }
2413 
2414     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2415     while (its--){
2416 
2417       /* lower triangular part: slvec0b = - B^T*xx */
2418       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2419 
2420       /* copy xx into slvec0a */
2421       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2422       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2423       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2424       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2425 
2426       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2427 
2428       /* copy bb into slvec1a */
2429       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2430       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2431       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2432       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2433 
2434       /* set slvec1b = 0 */
2435       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2436 
2437       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2438       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2439       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2440       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2441 
2442       /* upper triangular part: bb1 = bb1 - B*x */
2443       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2444 
2445       /* local diagonal sweep */
2446       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2447     }
2448     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2449   } else {
2450     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2451   }
2452   PetscFunctionReturn(0);
2453 }
2454 
2455 #undef __FUNCT__
2456 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2457 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2458 {
2459   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2460   PetscErrorCode ierr;
2461   PetscScalar    mone=-1.0;
2462   Vec            lvec1,bb1;
2463 
2464   PetscFunctionBegin;
2465   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2466   if (matin->bs > 1)
2467     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2468 
2469   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2470     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2471       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2472       its--;
2473     }
2474 
2475     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2476     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2477     while (its--){
2478       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2479 
2480       /* lower diagonal part: bb1 = bb - B^T*xx */
2481       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2482       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2483 
2484       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2485       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2486       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2487 
2488       /* upper diagonal part: bb1 = bb1 - B*x */
2489       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2490       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2491 
2492       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2493 
2494       /* diagonal sweep */
2495       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2496     }
2497     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2498     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2499   } else {
2500     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2501   }
2502   PetscFunctionReturn(0);
2503 }
2504 
2505