xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 63879571fdc812aab9a52703ff2eccb2f9c1b75e)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else {
563       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
564     }
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
571 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
572 {
573   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
574   PetscErrorCode ierr;
575   PetscInt       nstash,reallocs;
576   InsertMode     addv;
577 
578   PetscFunctionBegin;
579   if (baij->donotstash) {
580     PetscFunctionReturn(0);
581   }
582 
583   /* make sure all processors are either in INSERTMODE or ADDMODE */
584   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
585   if (addv == (ADD_VALUES|INSERT_VALUES)) {
586     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
587   }
588   mat->insertmode = addv; /* in case this processor had no cache */
589 
590   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
591   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
592   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
593   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
594   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
595   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
601 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
602 {
603   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
604   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
605   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
606   PetscErrorCode ierr;
607   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
608   PetscInt       *row,*col,other_disassembled;
609   PetscMPIInt    n;
610   PetscTruth     r1,r2,r3;
611   MatScalar      *val;
612   InsertMode     addv = mat->insertmode;
613 
614   PetscFunctionBegin;
615 
616   if (!baij->donotstash) {
617     while (1) {
618       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
619       if (!flg) break;
620 
621       for (i=0; i<n;) {
622         /* Now identify the consecutive vals belonging to the same row */
623         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
624         if (j < n) ncols = j-i;
625         else       ncols = n-i;
626         /* Now assemble all these values with a single function call */
627         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
628         i = j;
629       }
630     }
631     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
632     /* Now process the block-stash. Since the values are stashed column-oriented,
633        set the roworiented flag to column oriented, and after MatSetValues()
634        restore the original flags */
635     r1 = baij->roworiented;
636     r2 = a->roworiented;
637     r3 = b->roworiented;
638     baij->roworiented = PETSC_FALSE;
639     a->roworiented    = PETSC_FALSE;
640     b->roworiented    = PETSC_FALSE;
641     while (1) {
642       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
643       if (!flg) break;
644 
645       for (i=0; i<n;) {
646         /* Now identify the consecutive vals belonging to the same row */
647         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
648         if (j < n) ncols = j-i;
649         else       ncols = n-i;
650         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
651         i = j;
652       }
653     }
654     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
655     baij->roworiented = r1;
656     a->roworiented    = r2;
657     b->roworiented    = r3;
658   }
659 
660   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675 
676   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
677     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
678   }
679   b->compressedrow.use = PETSC_TRUE;
680   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
681   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
682 
683   if (baij->rowvalues) {
684     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
685     baij->rowvalues = 0;
686   }
687 
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
693 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
694 {
695   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
696   PetscErrorCode    ierr;
697   PetscInt          bs = mat->bs;
698   PetscMPIInt       size = baij->size,rank = baij->rank;
699   PetscTruth        iascii,isdraw;
700   PetscViewer       sviewer;
701   PetscViewerFormat format;
702 
703   PetscFunctionBegin;
704   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
705   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
706   if (iascii) {
707     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
708     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
709       MatInfo info;
710       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
711       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
712       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
713               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
714               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
715       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
716       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
717       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
718       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
719       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
720       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
721       PetscFunctionReturn(0);
722     } else if (format == PETSC_VIEWER_ASCII_INFO) {
723       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
724       PetscFunctionReturn(0);
725     }
726   }
727 
728   if (isdraw) {
729     PetscDraw       draw;
730     PetscTruth isnull;
731     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
732     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
733   }
734 
735   if (size == 1) {
736     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
737     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
738   } else {
739     /* assemble the entire matrix onto first processor. */
740     Mat         A;
741     Mat_SeqSBAIJ *Aloc;
742     Mat_SeqBAIJ *Bloc;
743     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
744     MatScalar   *a;
745 
746     /* Should this be the same type as mat? */
747     if (!rank) {
748       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
749     } else {
750       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
751     }
752     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
753     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
754     PetscLogObjectParent(mat,A);
755 
756     /* copy over the A part */
757     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
758     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
759     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
760 
761     for (i=0; i<mbs; i++) {
762       rvals[0] = bs*(baij->rstart + i);
763       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764       for (j=ai[i]; j<ai[i+1]; j++) {
765         col = (baij->cstart+aj[j])*bs;
766         for (k=0; k<bs; k++) {
767           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768           col++; a += bs;
769         }
770       }
771     }
772     /* copy over the B part */
773     Bloc = (Mat_SeqBAIJ*)baij->B->data;
774     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
775     for (i=0; i<mbs; i++) {
776       rvals[0] = bs*(baij->rstart + i);
777       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
778       for (j=ai[i]; j<ai[i+1]; j++) {
779         col = baij->garray[aj[j]]*bs;
780         for (k=0; k<bs; k++) {
781           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
782           col++; a += bs;
783         }
784       }
785     }
786     ierr = PetscFree(rvals);CHKERRQ(ierr);
787     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
789     /*
790        Everyone has to call to draw the matrix since the graphics waits are
791        synchronized across all processors that share the PetscDraw object
792     */
793     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
794     if (!rank) {
795       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
796       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
797     }
798     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
799     ierr = MatDestroy(A);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatView_MPISBAIJ"
806 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
807 {
808   PetscErrorCode ierr;
809   PetscTruth     iascii,isdraw,issocket,isbinary;
810 
811   PetscFunctionBegin;
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
815   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
816   if (iascii || isdraw || issocket || isbinary) {
817     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
818   } else {
819     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_MPISBAIJ"
826 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
827 {
828   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
834 #endif
835   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
836   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
837   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
838   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
839   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
840 #if defined (PETSC_USE_CTABLE)
841   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
842 #else
843   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
844 #endif
845   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
846   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
847   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
848   if (baij->slvec0) {
849     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
850     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
851   }
852   if (baij->slvec1) {
853     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
854     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
855     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
856   }
857   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
858   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
859   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
860   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
861 #if defined(PETSC_USE_MAT_SINGLE)
862   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
863 #endif
864   ierr = PetscFree(baij);CHKERRQ(ierr);
865 
866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
869   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatMult_MPISBAIJ"
875 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
876 {
877   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
878   PetscErrorCode ierr;
879   PetscInt       nt,mbs=a->mbs,bs=A->bs;
880   PetscScalar    *x,*from,zero=0.0;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->n) {
885     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
886   }
887   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
888   if (nt != A->m) {
889     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
890   }
891 
892   /* diagonal part */
893   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
894   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
895 
896   /* subdiagonal part */
897   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
898 
899   /* copy x into the vec slvec0 */
900   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
901   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
902   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
903   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
904 
905   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
908 
909   /* supperdiagonal part */
910   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
911 
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
917 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
918 {
919   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
920   PetscErrorCode ierr;
921   PetscInt       nt;
922 
923   PetscFunctionBegin;
924   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
925   if (nt != A->n) {
926     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
927   }
928   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
929   if (nt != A->m) {
930     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
931   }
932 
933   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
934   /* do diagonal part */
935   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
936   /* do supperdiagonal part */
937   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
938   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
939   /* do subdiagonal part */
940   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
941   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
942   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
943 
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
949 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
950 {
951   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       mbs=a->mbs,bs=A->bs;
954   PetscScalar    *x,*from,zero=0.0;
955 
956   PetscFunctionBegin;
957   /*
958   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
959   PetscSynchronizedFlush(A->comm);
960   */
961   /* diagonal part */
962   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
963   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
964 
965   /* subdiagonal part */
966   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
967 
968   /* copy x into the vec slvec0 */
969   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
970   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
971   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
972   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
973 
974   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
975   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
976   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
977 
978   /* supperdiagonal part */
979   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
986 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
987 {
988   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
993   /* do diagonal part */
994   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
995   /* do supperdiagonal part */
996   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
997   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
998 
999   /* do subdiagonal part */
1000   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1001   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1003 
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1009 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1020 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /*
1030   This only works correctly for square matrices where the subblock A->A is the
1031    diagonal block
1032 */
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1035 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1036 {
1037   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1042   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatScale_MPISBAIJ"
1048 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1049 {
1050   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1055   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1061 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1062 {
1063   PetscFunctionBegin;
1064   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) {
1076     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1077   }
1078   baij->getrowactive = PETSC_FALSE;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1084 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1085 {
1086   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1087   PetscErrorCode ierr;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1091   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1097 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1098 {
1099   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1100   Mat            A = a->A,B = a->B;
1101   PetscErrorCode ierr;
1102   PetscReal      isend[5],irecv[5];
1103 
1104   PetscFunctionBegin;
1105   info->block_size     = (PetscReal)matin->bs;
1106   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1107   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1108   isend[3] = info->memory;  isend[4] = info->mallocs;
1109   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1110   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1111   isend[3] += info->memory;  isend[4] += info->mallocs;
1112   if (flag == MAT_LOCAL) {
1113     info->nz_used      = isend[0];
1114     info->nz_allocated = isend[1];
1115     info->nz_unneeded  = isend[2];
1116     info->memory       = isend[3];
1117     info->mallocs      = isend[4];
1118   } else if (flag == MAT_GLOBAL_MAX) {
1119     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1120     info->nz_used      = irecv[0];
1121     info->nz_allocated = irecv[1];
1122     info->nz_unneeded  = irecv[2];
1123     info->memory       = irecv[3];
1124     info->mallocs      = irecv[4];
1125   } else if (flag == MAT_GLOBAL_SUM) {
1126     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1127     info->nz_used      = irecv[0];
1128     info->nz_allocated = irecv[1];
1129     info->nz_unneeded  = irecv[2];
1130     info->memory       = irecv[3];
1131     info->mallocs      = irecv[4];
1132   } else {
1133     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1134   }
1135   info->rows_global       = (PetscReal)A->M;
1136   info->columns_global    = (PetscReal)A->N;
1137   info->rows_local        = (PetscReal)A->m;
1138   info->columns_local     = (PetscReal)A->N;
1139   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1140   info->fill_ratio_needed = 0;
1141   info->factor_mallocs    = 0;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1147 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1148 {
1149   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   switch (op) {
1154   case MAT_NO_NEW_NONZERO_LOCATIONS:
1155   case MAT_YES_NEW_NONZERO_LOCATIONS:
1156   case MAT_COLUMNS_UNSORTED:
1157   case MAT_COLUMNS_SORTED:
1158   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1159   case MAT_KEEP_ZEROED_ROWS:
1160   case MAT_NEW_NONZERO_LOCATION_ERR:
1161     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1162     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1163     break;
1164   case MAT_ROW_ORIENTED:
1165     a->roworiented = PETSC_TRUE;
1166     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1167     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1168     break;
1169   case MAT_ROWS_SORTED:
1170   case MAT_ROWS_UNSORTED:
1171   case MAT_YES_NEW_DIAGONALS:
1172     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1173     break;
1174   case MAT_COLUMN_ORIENTED:
1175     a->roworiented = PETSC_FALSE;
1176     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1178     break;
1179   case MAT_IGNORE_OFF_PROC_ENTRIES:
1180     a->donotstash = PETSC_TRUE;
1181     break;
1182   case MAT_NO_NEW_DIAGONALS:
1183     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1184   case MAT_USE_HASH_TABLE:
1185     a->ht_flag = PETSC_TRUE;
1186     break;
1187   case MAT_NOT_SYMMETRIC:
1188   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1189   case MAT_HERMITIAN:
1190     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1191   case MAT_SYMMETRIC:
1192   case MAT_STRUCTURALLY_SYMMETRIC:
1193   case MAT_NOT_HERMITIAN:
1194   case MAT_SYMMETRY_ETERNAL:
1195   case MAT_NOT_SYMMETRY_ETERNAL:
1196     break;
1197   default:
1198     SETERRQ(PETSC_ERR_SUP,"unknown option");
1199   }
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1205 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1206 {
1207   PetscErrorCode ierr;
1208   PetscFunctionBegin;
1209   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1215 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1216 {
1217   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1218   Mat            a = baij->A,b = baij->B;
1219   PetscErrorCode ierr;
1220   PetscInt       s1,s2,s3;
1221 
1222   PetscFunctionBegin;
1223   if (ll != rr) {
1224     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1225   }
1226   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1227   if (rr) {
1228     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1229     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1230     /* Overlap communication with computation. */
1231     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1232     /*} if (ll) { */
1233     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1234     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1235     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1236     /* } */
1237   /* scale  the diagonal block */
1238   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1239 
1240   /* if (rr) { */
1241     /* Do a scatter end and then right scale the off-diagonal block */
1242     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1243     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1244   }
1245 
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1251 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1252 {
1253   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1254   MPI_Comm          comm = A->comm;
1255   static PetscTruth called = PETSC_FALSE;
1256   PetscErrorCode    ierr;
1257 
1258   PetscFunctionBegin;
1259   if (!a->rank) {
1260     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1261   }
1262   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1263   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1264   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1270 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1271 {
1272   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1277   PetscFunctionReturn(0);
1278 }
1279 
1280 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1281 
1282 #undef __FUNCT__
1283 #define __FUNCT__ "MatEqual_MPISBAIJ"
1284 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1285 {
1286   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1287   Mat            a,b,c,d;
1288   PetscTruth     flg;
1289   PetscErrorCode ierr;
1290 
1291   PetscFunctionBegin;
1292   a = matA->A; b = matA->B;
1293   c = matB->A; d = matB->B;
1294 
1295   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1296   if (flg) {
1297     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1298   }
1299   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1300   PetscFunctionReturn(0);
1301 }
1302 
1303 #undef __FUNCT__
1304 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1305 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1306 {
1307   PetscErrorCode ierr;
1308 
1309   PetscFunctionBegin;
1310   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1311   PetscFunctionReturn(0);
1312 }
1313 
1314 #undef __FUNCT__
1315 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1316 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1317 {
1318   PetscErrorCode ierr;
1319   PetscInt       i;
1320   PetscTruth     flg;
1321 
1322   PetscFunctionBegin;
1323   for (i=0; i<n; i++) {
1324     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1325     if (!flg) {
1326       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1327     }
1328   }
1329   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1330   PetscFunctionReturn(0);
1331 }
1332 
1333 
1334 /* -------------------------------------------------------------------*/
1335 static struct _MatOps MatOps_Values = {
1336        MatSetValues_MPISBAIJ,
1337        MatGetRow_MPISBAIJ,
1338        MatRestoreRow_MPISBAIJ,
1339        MatMult_MPISBAIJ,
1340 /* 4*/ MatMultAdd_MPISBAIJ,
1341        MatMultTranspose_MPISBAIJ,
1342        MatMultTransposeAdd_MPISBAIJ,
1343        0,
1344        0,
1345        0,
1346 /*10*/ 0,
1347        0,
1348        0,
1349        MatRelax_MPISBAIJ,
1350        MatTranspose_MPISBAIJ,
1351 /*15*/ MatGetInfo_MPISBAIJ,
1352        MatEqual_MPISBAIJ,
1353        MatGetDiagonal_MPISBAIJ,
1354        MatDiagonalScale_MPISBAIJ,
1355        MatNorm_MPISBAIJ,
1356 /*20*/ MatAssemblyBegin_MPISBAIJ,
1357        MatAssemblyEnd_MPISBAIJ,
1358        0,
1359        MatSetOption_MPISBAIJ,
1360        MatZeroEntries_MPISBAIJ,
1361 /*25*/ 0,
1362        0,
1363        0,
1364        0,
1365        0,
1366 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1367        0,
1368        0,
1369        0,
1370        0,
1371 /*35*/ MatDuplicate_MPISBAIJ,
1372        0,
1373        0,
1374        0,
1375        0,
1376 /*40*/ 0,
1377        MatGetSubMatrices_MPISBAIJ,
1378        MatIncreaseOverlap_MPISBAIJ,
1379        MatGetValues_MPISBAIJ,
1380        0,
1381 /*45*/ MatPrintHelp_MPISBAIJ,
1382        MatScale_MPISBAIJ,
1383        0,
1384        0,
1385        0,
1386 /*50*/ 0,
1387        0,
1388        0,
1389        0,
1390        0,
1391 /*55*/ 0,
1392        0,
1393        MatSetUnfactored_MPISBAIJ,
1394        0,
1395        MatSetValuesBlocked_MPISBAIJ,
1396 /*60*/ 0,
1397        0,
1398        0,
1399        MatGetPetscMaps_Petsc,
1400        0,
1401 /*65*/ 0,
1402        0,
1403        0,
1404        0,
1405        0,
1406 /*70*/ MatGetRowMax_MPISBAIJ,
1407        0,
1408        0,
1409        0,
1410        0,
1411 /*75*/ 0,
1412        0,
1413        0,
1414        0,
1415        0,
1416 /*80*/ 0,
1417        0,
1418        0,
1419        0,
1420        MatLoad_MPISBAIJ,
1421 /*85*/ 0,
1422        0,
1423        0,
1424        0,
1425        0,
1426 /*90*/ 0,
1427        0,
1428        0,
1429        0,
1430        0,
1431 /*95*/ 0,
1432        0,
1433        0,
1434        0};
1435 
1436 
1437 EXTERN_C_BEGIN
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1440 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1441 {
1442   PetscFunctionBegin;
1443   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1444   *iscopy = PETSC_FALSE;
1445   PetscFunctionReturn(0);
1446 }
1447 EXTERN_C_END
1448 
1449 EXTERN_C_BEGIN
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1452 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1453 {
1454   Mat_MPISBAIJ   *b;
1455   PetscErrorCode ierr;
1456   PetscInt       i,mbs,Mbs;
1457 
1458   PetscFunctionBegin;
1459   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1460 
1461   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1462   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1463   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1464   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1465   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1466   if (d_nnz) {
1467     for (i=0; i<B->m/bs; i++) {
1468       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1469     }
1470   }
1471   if (o_nnz) {
1472     for (i=0; i<B->m/bs; i++) {
1473       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1474     }
1475   }
1476   B->preallocated = PETSC_TRUE;
1477   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1478   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1479   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1480   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1481 
1482   b   = (Mat_MPISBAIJ*)B->data;
1483   mbs = B->m/bs;
1484   Mbs = B->M/bs;
1485   if (mbs*bs != B->m) {
1486     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1487   }
1488 
1489   B->bs  = bs;
1490   b->bs2 = bs*bs;
1491   b->mbs = mbs;
1492   b->nbs = mbs;
1493   b->Mbs = Mbs;
1494   b->Nbs = Mbs;
1495 
1496   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1497   b->rowners[0]    = 0;
1498   for (i=2; i<=b->size; i++) {
1499     b->rowners[i] += b->rowners[i-1];
1500   }
1501   b->rstart    = b->rowners[b->rank];
1502   b->rend      = b->rowners[b->rank+1];
1503   b->cstart    = b->rstart;
1504   b->cend      = b->rend;
1505   for (i=0; i<=b->size; i++) {
1506     b->rowners_bs[i] = b->rowners[i]*bs;
1507   }
1508   b->rstart_bs = b-> rstart*bs;
1509   b->rend_bs   = b->rend*bs;
1510 
1511   b->cstart_bs = b->cstart*bs;
1512   b->cend_bs   = b->cend*bs;
1513 
1514   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1515   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1516   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1517   PetscLogObjectParent(B,b->A);
1518 
1519   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1520   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1521   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1522   PetscLogObjectParent(B,b->B);
1523 
1524   /* build cache for off array entries formed */
1525   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1526 
1527   PetscFunctionReturn(0);
1528 }
1529 EXTERN_C_END
1530 
1531 /*MC
1532    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1533    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1534 
1535    Options Database Keys:
1536 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1537 
1538   Level: beginner
1539 
1540 .seealso: MatCreateMPISBAIJ
1541 M*/
1542 
1543 EXTERN_C_BEGIN
1544 #undef __FUNCT__
1545 #define __FUNCT__ "MatCreate_MPISBAIJ"
1546 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1547 {
1548   Mat_MPISBAIJ   *b;
1549   PetscErrorCode ierr;
1550   PetscTruth     flg;
1551 
1552   PetscFunctionBegin;
1553 
1554   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1555   B->data = (void*)b;
1556   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1557 
1558   B->ops->destroy    = MatDestroy_MPISBAIJ;
1559   B->ops->view       = MatView_MPISBAIJ;
1560   B->mapping    = 0;
1561   B->factor     = 0;
1562   B->assembled  = PETSC_FALSE;
1563 
1564   B->insertmode = NOT_SET_VALUES;
1565   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1566   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1567 
1568   /* build local table of row and column ownerships */
1569   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1570   b->cowners    = b->rowners + b->size + 2;
1571   b->rowners_bs = b->cowners + b->size + 2;
1572   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1573 
1574   /* build cache for off array entries formed */
1575   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1576   b->donotstash  = PETSC_FALSE;
1577   b->colmap      = PETSC_NULL;
1578   b->garray      = PETSC_NULL;
1579   b->roworiented = PETSC_TRUE;
1580 
1581 #if defined(PETSC_USE_MAT_SINGLE)
1582   /* stuff for MatSetValues_XXX in single precision */
1583   b->setvalueslen     = 0;
1584   b->setvaluescopy    = PETSC_NULL;
1585 #endif
1586 
1587   /* stuff used in block assembly */
1588   b->barray       = 0;
1589 
1590   /* stuff used for matrix vector multiply */
1591   b->lvec         = 0;
1592   b->Mvctx        = 0;
1593   b->slvec0       = 0;
1594   b->slvec0b      = 0;
1595   b->slvec1       = 0;
1596   b->slvec1a      = 0;
1597   b->slvec1b      = 0;
1598   b->sMvctx       = 0;
1599 
1600   /* stuff for MatGetRow() */
1601   b->rowindices   = 0;
1602   b->rowvalues    = 0;
1603   b->getrowactive = PETSC_FALSE;
1604 
1605   /* hash table stuff */
1606   b->ht           = 0;
1607   b->hd           = 0;
1608   b->ht_size      = 0;
1609   b->ht_flag      = PETSC_FALSE;
1610   b->ht_fact      = 0;
1611   b->ht_total_ct  = 0;
1612   b->ht_insert_ct = 0;
1613 
1614   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1615   if (flg) {
1616     PetscReal fact = 1.39;
1617     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1618     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1619     if (fact <= 1.0) fact = 1.39;
1620     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1621     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1622   }
1623   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1624                                      "MatStoreValues_MPISBAIJ",
1625                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1626   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1627                                      "MatRetrieveValues_MPISBAIJ",
1628                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1629   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1630                                      "MatGetDiagonalBlock_MPISBAIJ",
1631                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1632   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1633                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1634                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1635   B->symmetric                  = PETSC_TRUE;
1636   B->structurally_symmetric     = PETSC_TRUE;
1637   B->symmetric_set              = PETSC_TRUE;
1638   B->structurally_symmetric_set = PETSC_TRUE;
1639   PetscFunctionReturn(0);
1640 }
1641 EXTERN_C_END
1642 
1643 /*MC
1644    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1645 
1646    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1647    and MATMPISBAIJ otherwise.
1648 
1649    Options Database Keys:
1650 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1651 
1652   Level: beginner
1653 
1654 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1655 M*/
1656 
1657 EXTERN_C_BEGIN
1658 #undef __FUNCT__
1659 #define __FUNCT__ "MatCreate_SBAIJ"
1660 PetscErrorCode MatCreate_SBAIJ(Mat A)
1661 {
1662   PetscErrorCode ierr;
1663   PetscMPIInt    size;
1664 
1665   PetscFunctionBegin;
1666   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1667   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1668   if (size == 1) {
1669     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1670   } else {
1671     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1672   }
1673   PetscFunctionReturn(0);
1674 }
1675 EXTERN_C_END
1676 
1677 #undef __FUNCT__
1678 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1679 /*@C
1680    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1681    the user should preallocate the matrix storage by setting the parameters
1682    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1683    performance can be increased by more than a factor of 50.
1684 
1685    Collective on Mat
1686 
1687    Input Parameters:
1688 +  A - the matrix
1689 .  bs   - size of blockk
1690 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1691            submatrix  (same for all local rows)
1692 .  d_nnz - array containing the number of block nonzeros in the various block rows
1693            in the upper triangular and diagonal part of the in diagonal portion of the local
1694            (possibly different for each block row) or PETSC_NULL.  You must leave room
1695            for the diagonal entry even if it is zero.
1696 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1697            submatrix (same for all local rows).
1698 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1699            off-diagonal portion of the local submatrix (possibly different for
1700            each block row) or PETSC_NULL.
1701 
1702 
1703    Options Database Keys:
1704 .   -mat_no_unroll - uses code that does not unroll the loops in the
1705                      block calculations (much slower)
1706 .   -mat_block_size - size of the blocks to use
1707 
1708    Notes:
1709 
1710    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1711    than it must be used on all processors that share the object for that argument.
1712 
1713    If the *_nnz parameter is given then the *_nz parameter is ignored
1714 
1715    Storage Information:
1716    For a square global matrix we define each processor's diagonal portion
1717    to be its local rows and the corresponding columns (a square submatrix);
1718    each processor's off-diagonal portion encompasses the remainder of the
1719    local matrix (a rectangular submatrix).
1720 
1721    The user can specify preallocated storage for the diagonal part of
1722    the local submatrix with either d_nz or d_nnz (not both).  Set
1723    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1724    memory allocation.  Likewise, specify preallocated storage for the
1725    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1726 
1727    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1728    the figure below we depict these three local rows and all columns (0-11).
1729 
1730 .vb
1731            0 1 2 3 4 5 6 7 8 9 10 11
1732           -------------------
1733    row 3  |  o o o d d d o o o o o o
1734    row 4  |  o o o d d d o o o o o o
1735    row 5  |  o o o d d d o o o o o o
1736           -------------------
1737 .ve
1738 
1739    Thus, any entries in the d locations are stored in the d (diagonal)
1740    submatrix, and any entries in the o locations are stored in the
1741    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1742    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1743 
1744    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1745    plus the diagonal part of the d matrix,
1746    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1747    In general, for PDE problems in which most nonzeros are near the diagonal,
1748    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1749    or you will get TERRIBLE performance; see the users' manual chapter on
1750    matrices.
1751 
1752    Level: intermediate
1753 
1754 .keywords: matrix, block, aij, compressed row, sparse, parallel
1755 
1756 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1757 @*/
1758 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1759 {
1760   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1761 
1762   PetscFunctionBegin;
1763   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1764   if (f) {
1765     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1766   }
1767   PetscFunctionReturn(0);
1768 }
1769 
1770 #undef __FUNCT__
1771 #define __FUNCT__ "MatCreateMPISBAIJ"
1772 /*@C
1773    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1774    (block compressed row).  For good matrix assembly performance
1775    the user should preallocate the matrix storage by setting the parameters
1776    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1777    performance can be increased by more than a factor of 50.
1778 
1779    Collective on MPI_Comm
1780 
1781    Input Parameters:
1782 +  comm - MPI communicator
1783 .  bs   - size of blockk
1784 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1785            This value should be the same as the local size used in creating the
1786            y vector for the matrix-vector product y = Ax.
1787 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1788            This value should be the same as the local size used in creating the
1789            x vector for the matrix-vector product y = Ax.
1790 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1791 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1792 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1793            submatrix  (same for all local rows)
1794 .  d_nnz - array containing the number of block nonzeros in the various block rows
1795            in the upper triangular portion of the in diagonal portion of the local
1796            (possibly different for each block block row) or PETSC_NULL.
1797            You must leave room for the diagonal entry even if it is zero.
1798 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1799            submatrix (same for all local rows).
1800 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1801            off-diagonal portion of the local submatrix (possibly different for
1802            each block row) or PETSC_NULL.
1803 
1804    Output Parameter:
1805 .  A - the matrix
1806 
1807    Options Database Keys:
1808 .   -mat_no_unroll - uses code that does not unroll the loops in the
1809                      block calculations (much slower)
1810 .   -mat_block_size - size of the blocks to use
1811 .   -mat_mpi - use the parallel matrix data structures even on one processor
1812                (defaults to using SeqBAIJ format on one processor)
1813 
1814    Notes:
1815    The user MUST specify either the local or global matrix dimensions
1816    (possibly both).
1817 
1818    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1819    than it must be used on all processors that share the object for that argument.
1820 
1821    If the *_nnz parameter is given then the *_nz parameter is ignored
1822 
1823    Storage Information:
1824    For a square global matrix we define each processor's diagonal portion
1825    to be its local rows and the corresponding columns (a square submatrix);
1826    each processor's off-diagonal portion encompasses the remainder of the
1827    local matrix (a rectangular submatrix).
1828 
1829    The user can specify preallocated storage for the diagonal part of
1830    the local submatrix with either d_nz or d_nnz (not both).  Set
1831    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1832    memory allocation.  Likewise, specify preallocated storage for the
1833    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1834 
1835    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1836    the figure below we depict these three local rows and all columns (0-11).
1837 
1838 .vb
1839            0 1 2 3 4 5 6 7 8 9 10 11
1840           -------------------
1841    row 3  |  o o o d d d o o o o o o
1842    row 4  |  o o o d d d o o o o o o
1843    row 5  |  o o o d d d o o o o o o
1844           -------------------
1845 .ve
1846 
1847    Thus, any entries in the d locations are stored in the d (diagonal)
1848    submatrix, and any entries in the o locations are stored in the
1849    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1850    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1851 
1852    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1853    plus the diagonal part of the d matrix,
1854    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1855    In general, for PDE problems in which most nonzeros are near the diagonal,
1856    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1857    or you will get TERRIBLE performance; see the users' manual chapter on
1858    matrices.
1859 
1860    Level: intermediate
1861 
1862 .keywords: matrix, block, aij, compressed row, sparse, parallel
1863 
1864 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1865 @*/
1866 
1867 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1868 {
1869   PetscErrorCode ierr;
1870   PetscMPIInt    size;
1871 
1872   PetscFunctionBegin;
1873   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1874   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1875   if (size > 1) {
1876     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1877     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1878   } else {
1879     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1880     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1881   }
1882   PetscFunctionReturn(0);
1883 }
1884 
1885 
1886 #undef __FUNCT__
1887 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1888 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1889 {
1890   Mat            mat;
1891   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1892   PetscErrorCode ierr;
1893   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1894   PetscScalar    *array;
1895 
1896   PetscFunctionBegin;
1897   *newmat       = 0;
1898   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1899   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1900   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1901 
1902   mat->factor       = matin->factor;
1903   mat->preallocated = PETSC_TRUE;
1904   mat->assembled    = PETSC_TRUE;
1905   mat->insertmode   = NOT_SET_VALUES;
1906 
1907   a = (Mat_MPISBAIJ*)mat->data;
1908   mat->bs  = matin->bs;
1909   a->bs2   = oldmat->bs2;
1910   a->mbs   = oldmat->mbs;
1911   a->nbs   = oldmat->nbs;
1912   a->Mbs   = oldmat->Mbs;
1913   a->Nbs   = oldmat->Nbs;
1914 
1915   a->rstart       = oldmat->rstart;
1916   a->rend         = oldmat->rend;
1917   a->cstart       = oldmat->cstart;
1918   a->cend         = oldmat->cend;
1919   a->size         = oldmat->size;
1920   a->rank         = oldmat->rank;
1921   a->donotstash   = oldmat->donotstash;
1922   a->roworiented  = oldmat->roworiented;
1923   a->rowindices   = 0;
1924   a->rowvalues    = 0;
1925   a->getrowactive = PETSC_FALSE;
1926   a->barray       = 0;
1927   a->rstart_bs    = oldmat->rstart_bs;
1928   a->rend_bs      = oldmat->rend_bs;
1929   a->cstart_bs    = oldmat->cstart_bs;
1930   a->cend_bs      = oldmat->cend_bs;
1931 
1932   /* hash table stuff */
1933   a->ht           = 0;
1934   a->hd           = 0;
1935   a->ht_size      = 0;
1936   a->ht_flag      = oldmat->ht_flag;
1937   a->ht_fact      = oldmat->ht_fact;
1938   a->ht_total_ct  = 0;
1939   a->ht_insert_ct = 0;
1940 
1941   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1942   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1943   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1944   if (oldmat->colmap) {
1945 #if defined (PETSC_USE_CTABLE)
1946     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1947 #else
1948     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1949     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
1950     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1951 #endif
1952   } else a->colmap = 0;
1953 
1954   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1955     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1956     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1957     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1958   } else a->garray = 0;
1959 
1960   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1961   PetscLogObjectParent(mat,a->lvec);
1962   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1963   PetscLogObjectParent(mat,a->Mvctx);
1964 
1965   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1966   PetscLogObjectParent(mat,a->slvec0);
1967   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1968   PetscLogObjectParent(mat,a->slvec1);
1969 
1970   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1971   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1972   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1973   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1974   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1975   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1976   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1977   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1978   PetscLogObjectParent(mat,a->slvec0);
1979   PetscLogObjectParent(mat,a->slvec1);
1980   PetscLogObjectParent(mat,a->slvec0b);
1981   PetscLogObjectParent(mat,a->slvec1a);
1982   PetscLogObjectParent(mat,a->slvec1b);
1983 
1984   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1985   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1986   a->sMvctx = oldmat->sMvctx;
1987   PetscLogObjectParent(mat,a->sMvctx);
1988 
1989   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1990   PetscLogObjectParent(mat,a->A);
1991   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1992   PetscLogObjectParent(mat,a->B);
1993   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1994   *newmat = mat;
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #include "petscsys.h"
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatLoad_MPISBAIJ"
2002 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2003 {
2004   Mat            A;
2005   PetscErrorCode ierr;
2006   PetscInt       i,nz,j,rstart,rend;
2007   PetscScalar    *vals,*buf;
2008   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2009   MPI_Status     status;
2010   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2011   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2012   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2013   PetscInt       bs=1,Mbs,mbs,extra_rows;
2014   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2015   PetscInt       dcount,kmax,k,nzcount,tmp;
2016   int            fd;
2017 
2018   PetscFunctionBegin;
2019   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2020 
2021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2022   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2023   if (!rank) {
2024     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2025     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2026     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2027     if (header[3] < 0) {
2028       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2029     }
2030   }
2031 
2032   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2033   M = header[1]; N = header[2];
2034 
2035   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2036 
2037   /*
2038      This code adds extra rows to make sure the number of rows is
2039      divisible by the blocksize
2040   */
2041   Mbs        = M/bs;
2042   extra_rows = bs - M + bs*(Mbs);
2043   if (extra_rows == bs) extra_rows = 0;
2044   else                  Mbs++;
2045   if (extra_rows &&!rank) {
2046     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2047   }
2048 
2049   /* determine ownership of all rows */
2050   mbs        = Mbs/size + ((Mbs % size) > rank);
2051   m          = mbs*bs;
2052   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2053   browners   = rowners + size + 1;
2054   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2055   rowners[0] = 0;
2056   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2057   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2058   rstart = rowners[rank];
2059   rend   = rowners[rank+1];
2060 
2061   /* distribute row lengths to all processors */
2062   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2063   if (!rank) {
2064     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2065     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2066     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2067     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2068     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2069     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2070     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2071   } else {
2072     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2073   }
2074 
2075   if (!rank) {   /* procs[0] */
2076     /* calculate the number of nonzeros on each processor */
2077     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2078     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2079     for (i=0; i<size; i++) {
2080       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2081         procsnz[i] += rowlengths[j];
2082       }
2083     }
2084     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2085 
2086     /* determine max buffer needed and allocate it */
2087     maxnz = 0;
2088     for (i=0; i<size; i++) {
2089       maxnz = PetscMax(maxnz,procsnz[i]);
2090     }
2091     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2092 
2093     /* read in my part of the matrix column indices  */
2094     nz     = procsnz[0];
2095     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2096     mycols = ibuf;
2097     if (size == 1)  nz -= extra_rows;
2098     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2099     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2100 
2101     /* read in every ones (except the last) and ship off */
2102     for (i=1; i<size-1; i++) {
2103       nz   = procsnz[i];
2104       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2105       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2106     }
2107     /* read in the stuff for the last proc */
2108     if (size != 1) {
2109       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2110       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2111       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2112       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2113     }
2114     ierr = PetscFree(cols);CHKERRQ(ierr);
2115   } else {  /* procs[i], i>0 */
2116     /* determine buffer space needed for message */
2117     nz = 0;
2118     for (i=0; i<m; i++) {
2119       nz += locrowlens[i];
2120     }
2121     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2122     mycols = ibuf;
2123     /* receive message of column indices*/
2124     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2125     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2126     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2127   }
2128 
2129   /* loop over local rows, determining number of off diagonal entries */
2130   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2131   odlens   = dlens + (rend-rstart);
2132   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2133   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2134   masked1  = mask    + Mbs;
2135   masked2  = masked1 + Mbs;
2136   rowcount = 0; nzcount = 0;
2137   for (i=0; i<mbs; i++) {
2138     dcount  = 0;
2139     odcount = 0;
2140     for (j=0; j<bs; j++) {
2141       kmax = locrowlens[rowcount];
2142       for (k=0; k<kmax; k++) {
2143         tmp = mycols[nzcount++]/bs; /* block col. index */
2144         if (!mask[tmp]) {
2145           mask[tmp] = 1;
2146           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2147           else masked1[dcount++] = tmp; /* entry in diag portion */
2148         }
2149       }
2150       rowcount++;
2151     }
2152 
2153     dlens[i]  = dcount;  /* d_nzz[i] */
2154     odlens[i] = odcount; /* o_nzz[i] */
2155 
2156     /* zero out the mask elements we set */
2157     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2158     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2159   }
2160 
2161   /* create our matrix */
2162   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2163   ierr = MatSetType(A,type);CHKERRQ(ierr);
2164   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2165   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2166 
2167   if (!rank) {
2168     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2169     /* read in my part of the matrix numerical values  */
2170     nz = procsnz[0];
2171     vals = buf;
2172     mycols = ibuf;
2173     if (size == 1)  nz -= extra_rows;
2174     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2175     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2176 
2177     /* insert into matrix */
2178     jj      = rstart*bs;
2179     for (i=0; i<m; i++) {
2180       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2181       mycols += locrowlens[i];
2182       vals   += locrowlens[i];
2183       jj++;
2184     }
2185 
2186     /* read in other processors (except the last one) and ship out */
2187     for (i=1; i<size-1; i++) {
2188       nz   = procsnz[i];
2189       vals = buf;
2190       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2191       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2192     }
2193     /* the last proc */
2194     if (size != 1){
2195       nz   = procsnz[i] - extra_rows;
2196       vals = buf;
2197       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2198       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2199       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2200     }
2201     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2202 
2203   } else {
2204     /* receive numeric values */
2205     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2206 
2207     /* receive message of values*/
2208     vals   = buf;
2209     mycols = ibuf;
2210     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2211     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2212     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2213 
2214     /* insert into matrix */
2215     jj      = rstart*bs;
2216     for (i=0; i<m; i++) {
2217       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2218       mycols += locrowlens[i];
2219       vals   += locrowlens[i];
2220       jj++;
2221     }
2222   }
2223 
2224   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2225   ierr = PetscFree(buf);CHKERRQ(ierr);
2226   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2227   ierr = PetscFree(rowners);CHKERRQ(ierr);
2228   ierr = PetscFree(dlens);CHKERRQ(ierr);
2229   ierr = PetscFree(mask);CHKERRQ(ierr);
2230   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2231   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2232   *newmat = A;
2233   PetscFunctionReturn(0);
2234 }
2235 
2236 #undef __FUNCT__
2237 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2238 /*XXXXX@
2239    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2240 
2241    Input Parameters:
2242 .  mat  - the matrix
2243 .  fact - factor
2244 
2245    Collective on Mat
2246 
2247    Level: advanced
2248 
2249   Notes:
2250    This can also be set by the command line option: -mat_use_hash_table fact
2251 
2252 .keywords: matrix, hashtable, factor, HT
2253 
2254 .seealso: MatSetOption()
2255 @XXXXX*/
2256 
2257 
2258 #undef __FUNCT__
2259 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2260 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2261 {
2262   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2263   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2264   PetscReal      atmp;
2265   PetscReal      *work,*svalues,*rvalues;
2266   PetscErrorCode ierr;
2267   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2268   PetscMPIInt    rank,size;
2269   PetscInt       *rowners_bs,dest,count,source;
2270   PetscScalar    *va;
2271   MatScalar      *ba;
2272   MPI_Status     stat;
2273 
2274   PetscFunctionBegin;
2275   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2276   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2277 
2278   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2279   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2280 
2281   bs   = A->bs;
2282   mbs  = a->mbs;
2283   Mbs  = a->Mbs;
2284   ba   = b->a;
2285   bi   = b->i;
2286   bj   = b->j;
2287 
2288   /* find ownerships */
2289   rowners_bs = a->rowners_bs;
2290 
2291   /* each proc creates an array to be distributed */
2292   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2293   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2294 
2295   /* row_max for B */
2296   if (rank != size-1){
2297     for (i=0; i<mbs; i++) {
2298       ncols = bi[1] - bi[0]; bi++;
2299       brow  = bs*i;
2300       for (j=0; j<ncols; j++){
2301         bcol = bs*(*bj);
2302         for (kcol=0; kcol<bs; kcol++){
2303           col = bcol + kcol;                 /* local col index */
2304           col += rowners_bs[rank+1];      /* global col index */
2305           for (krow=0; krow<bs; krow++){
2306             atmp = PetscAbsScalar(*ba); ba++;
2307             row = brow + krow;    /* local row index */
2308             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2309             if (work[col] < atmp) work[col] = atmp;
2310           }
2311         }
2312         bj++;
2313       }
2314     }
2315 
2316     /* send values to its owners */
2317     for (dest=rank+1; dest<size; dest++){
2318       svalues = work + rowners_bs[dest];
2319       count   = rowners_bs[dest+1]-rowners_bs[dest];
2320       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2321     }
2322   }
2323 
2324   /* receive values */
2325   if (rank){
2326     rvalues = work;
2327     count   = rowners_bs[rank+1]-rowners_bs[rank];
2328     for (source=0; source<rank; source++){
2329       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2330       /* process values */
2331       for (i=0; i<count; i++){
2332         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2333       }
2334     }
2335   }
2336 
2337   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2338   ierr = PetscFree(work);CHKERRQ(ierr);
2339   PetscFunctionReturn(0);
2340 }
2341 
2342 #undef __FUNCT__
2343 #define __FUNCT__ "MatRelax_MPISBAIJ"
2344 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2345 {
2346   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2347   PetscErrorCode ierr;
2348   PetscInt       mbs=mat->mbs,bs=matin->bs;
2349   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2350   Vec            bb1;
2351 
2352   PetscFunctionBegin;
2353   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2354   if (bs > 1)
2355     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2356 
2357   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2358     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2359       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2360       its--;
2361     }
2362 
2363     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2364     while (its--){
2365 
2366       /* lower triangular part: slvec0b = - B^T*xx */
2367       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2368 
2369       /* copy xx into slvec0a */
2370       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2371       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2372       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2373       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2374 
2375       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2376 
2377       /* copy bb into slvec1a */
2378       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2379       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2380       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2381       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2382 
2383       /* set slvec1b = 0 */
2384       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2385 
2386       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2387       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2388       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2389       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2390 
2391       /* upper triangular part: bb1 = bb1 - B*x */
2392       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2393 
2394       /* local diagonal sweep */
2395       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2396     }
2397     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2398   } else {
2399     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2400   }
2401   PetscFunctionReturn(0);
2402 }
2403 
2404 #undef __FUNCT__
2405 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2406 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2407 {
2408   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2409   PetscErrorCode ierr;
2410   PetscScalar    mone=-1.0;
2411   Vec            lvec1,bb1;
2412 
2413   PetscFunctionBegin;
2414   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2415   if (matin->bs > 1)
2416     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2417 
2418   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2419     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2420       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2421       its--;
2422     }
2423 
2424     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2425     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2426     while (its--){
2427       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2428 
2429       /* lower diagonal part: bb1 = bb - B^T*xx */
2430       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2431       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2432 
2433       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2434       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2435       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2436 
2437       /* upper diagonal part: bb1 = bb1 - B*x */
2438       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2439       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2440 
2441       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2442 
2443       /* diagonal sweep */
2444       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2445     }
2446     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2447     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2448   } else {
2449     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454