xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 5cb3ce70dc39b03a376c290c5f024f592c402eae)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else {
563       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
564     }
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
571 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
572 {
573   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
574   PetscErrorCode ierr;
575   PetscInt       nstash,reallocs;
576   InsertMode     addv;
577 
578   PetscFunctionBegin;
579   if (baij->donotstash) {
580     PetscFunctionReturn(0);
581   }
582 
583   /* make sure all processors are either in INSERTMODE or ADDMODE */
584   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
585   if (addv == (ADD_VALUES|INSERT_VALUES)) {
586     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
587   }
588   mat->insertmode = addv; /* in case this processor had no cache */
589 
590   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
591   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
592   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
593   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
594   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
595   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
601 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
602 {
603   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
604   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
605   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
606   PetscErrorCode ierr;
607   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
608   PetscInt       *row,*col,other_disassembled;
609   PetscMPIInt    n;
610   PetscTruth     r1,r2,r3;
611   MatScalar      *val;
612   InsertMode     addv = mat->insertmode;
613 
614   PetscFunctionBegin;
615 
616   if (!baij->donotstash) {
617     while (1) {
618       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
619       if (!flg) break;
620 
621       for (i=0; i<n;) {
622         /* Now identify the consecutive vals belonging to the same row */
623         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
624         if (j < n) ncols = j-i;
625         else       ncols = n-i;
626         /* Now assemble all these values with a single function call */
627         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
628         i = j;
629       }
630     }
631     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
632     /* Now process the block-stash. Since the values are stashed column-oriented,
633        set the roworiented flag to column oriented, and after MatSetValues()
634        restore the original flags */
635     r1 = baij->roworiented;
636     r2 = a->roworiented;
637     r3 = b->roworiented;
638     baij->roworiented = PETSC_FALSE;
639     a->roworiented    = PETSC_FALSE;
640     b->roworiented    = PETSC_FALSE;
641     while (1) {
642       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
643       if (!flg) break;
644 
645       for (i=0; i<n;) {
646         /* Now identify the consecutive vals belonging to the same row */
647         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
648         if (j < n) ncols = j-i;
649         else       ncols = n-i;
650         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
651         i = j;
652       }
653     }
654     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
655     baij->roworiented = r1;
656     a->roworiented    = r2;
657     b->roworiented    = r3;
658   }
659 
660   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675 
676   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
677     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
678   }
679   b->compressedrow.use = PETSC_TRUE;
680   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
681   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
682 
683   if (baij->rowvalues) {
684     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
685     baij->rowvalues = 0;
686   }
687 
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
693 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
694 {
695   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
696   PetscErrorCode    ierr;
697   PetscInt          bs = mat->bs;
698   PetscMPIInt       size = baij->size,rank = baij->rank;
699   PetscTruth        iascii,isdraw;
700   PetscViewer       sviewer;
701   PetscViewerFormat format;
702 
703   PetscFunctionBegin;
704   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
705   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
706   if (iascii) {
707     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
708     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
709       MatInfo info;
710       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
711       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
712       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
713               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
714               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
715       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
716       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
717       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
718       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
719       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
720       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
721       PetscFunctionReturn(0);
722     } else if (format == PETSC_VIEWER_ASCII_INFO) {
723       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
724       PetscFunctionReturn(0);
725     }
726   }
727 
728   if (isdraw) {
729     PetscDraw       draw;
730     PetscTruth isnull;
731     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
732     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
733   }
734 
735   if (size == 1) {
736     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
737     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
738   } else {
739     /* assemble the entire matrix onto first processor. */
740     Mat         A;
741     Mat_SeqSBAIJ *Aloc;
742     Mat_SeqBAIJ *Bloc;
743     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
744     MatScalar   *a;
745 
746     /* Should this be the same type as mat? */
747     if (!rank) {
748       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
749     } else {
750       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
751     }
752     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
753     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
754     PetscLogObjectParent(mat,A);
755 
756     /* copy over the A part */
757     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
758     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
759     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
760 
761     for (i=0; i<mbs; i++) {
762       rvals[0] = bs*(baij->rstart + i);
763       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764       for (j=ai[i]; j<ai[i+1]; j++) {
765         col = (baij->cstart+aj[j])*bs;
766         for (k=0; k<bs; k++) {
767           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768           col++; a += bs;
769         }
770       }
771     }
772     /* copy over the B part */
773     Bloc = (Mat_SeqBAIJ*)baij->B->data;
774     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
775     for (i=0; i<mbs; i++) {
776       rvals[0] = bs*(baij->rstart + i);
777       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
778       for (j=ai[i]; j<ai[i+1]; j++) {
779         col = baij->garray[aj[j]]*bs;
780         for (k=0; k<bs; k++) {
781           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
782           col++; a += bs;
783         }
784       }
785     }
786     ierr = PetscFree(rvals);CHKERRQ(ierr);
787     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
789     /*
790        Everyone has to call to draw the matrix since the graphics waits are
791        synchronized across all processors that share the PetscDraw object
792     */
793     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
794     if (!rank) {
795       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
796       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
797     }
798     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
799     ierr = MatDestroy(A);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatView_MPISBAIJ"
806 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
807 {
808   PetscErrorCode ierr;
809   PetscTruth     iascii,isdraw,issocket,isbinary;
810 
811   PetscFunctionBegin;
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
815   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
816   if (iascii || isdraw || issocket || isbinary) {
817     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
818   } else {
819     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_MPISBAIJ"
826 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
827 {
828   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
834 #endif
835   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
836   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
837   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
838   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
839   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
840 #if defined (PETSC_USE_CTABLE)
841   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
842 #else
843   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
844 #endif
845   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
846   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
847   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
848   if (baij->slvec0) {
849     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
850     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
851   }
852   if (baij->slvec1) {
853     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
854     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
855     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
856   }
857   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
858   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
859   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
860   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
861 #if defined(PETSC_USE_MAT_SINGLE)
862   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
863 #endif
864   ierr = PetscFree(baij);CHKERRQ(ierr);
865 
866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
869   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatMult_MPISBAIJ"
875 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
876 {
877   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
878   PetscErrorCode ierr;
879   PetscInt       nt,mbs=a->mbs,bs=A->bs;
880   PetscScalar    *x,*from,zero=0.0;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->n) {
885     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
886   }
887   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
888   if (nt != A->m) {
889     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
890   }
891 
892   /* diagonal part */
893   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
894   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
895 
896   /* subdiagonal part */
897   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
898 
899   /* copy x into the vec slvec0 */
900   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
901   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
902   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
903   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
904 
905   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
908 
909   /* supperdiagonal part */
910   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
911 
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
917 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
918 {
919   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
920   PetscErrorCode ierr;
921   PetscInt       nt;
922 
923   PetscFunctionBegin;
924   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
925   if (nt != A->n) {
926     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
927   }
928   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
929   if (nt != A->m) {
930     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
931   }
932 
933   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
934   /* do diagonal part */
935   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
936   /* do supperdiagonal part */
937   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
938   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
939   /* do subdiagonal part */
940   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
941   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
942   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
943 
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
949 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
950 {
951   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       mbs=a->mbs,bs=A->bs;
954   PetscScalar    *x,*from,zero=0.0;
955 
956   PetscFunctionBegin;
957   /*
958   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
959   PetscSynchronizedFlush(A->comm);
960   */
961   /* diagonal part */
962   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
963   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
964 
965   /* subdiagonal part */
966   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
967 
968   /* copy x into the vec slvec0 */
969   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
970   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
971   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
972   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
973 
974   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
975   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
976   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
977 
978   /* supperdiagonal part */
979   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
986 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
987 {
988   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
993   /* do diagonal part */
994   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
995   /* do supperdiagonal part */
996   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
997   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
998 
999   /* do subdiagonal part */
1000   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1001   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1003 
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1009 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1020 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /*
1030   This only works correctly for square matrices where the subblock A->A is the
1031    diagonal block
1032 */
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1035 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1036 {
1037   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1042   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatScale_MPISBAIJ"
1048 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1049 {
1050   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1055   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1061 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1062 {
1063   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1064   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1065   PetscErrorCode ierr;
1066   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1067   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1068   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1069 
1070   PetscFunctionBegin;
1071   if (mat->getrowactive) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1072   mat->getrowactive = PETSC_TRUE;
1073 
1074   if (!mat->rowvalues && (idx || v)) {
1075     /*
1076         allocate enough space to hold information from the longest row.
1077     */
1078     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1079     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1080     PetscInt     max = 1,mbs = mat->mbs,tmp;
1081     for (i=0; i<mbs; i++) {
1082       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1083       if (max < tmp) { max = tmp; }
1084     }
1085     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1086     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1087   }
1088 
1089   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1090   lrow = row - brstart;  /* local row index */
1091 
1092   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1093   if (!v)   {pvA = 0; pvB = 0;}
1094   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1095   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1096   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1097   nztot = nzA + nzB;
1098 
1099   cmap  = mat->garray;
1100   if (v  || idx) {
1101     if (nztot) {
1102       /* Sort by increasing column numbers, assuming A and B already sorted */
1103       PetscInt imark = -1;
1104       if (v) {
1105         *v = v_p = mat->rowvalues;
1106         for (i=0; i<nzB; i++) {
1107           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1108           else break;
1109         }
1110         imark = i;
1111         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1112         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1113       }
1114       if (idx) {
1115         *idx = idx_p = mat->rowindices;
1116         if (imark > -1) {
1117           for (i=0; i<imark; i++) {
1118             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1119           }
1120         } else {
1121           for (i=0; i<nzB; i++) {
1122             if (cmap[cworkB[i]/bs] < cstart)
1123               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1124             else break;
1125           }
1126           imark = i;
1127         }
1128         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1129         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1130       }
1131     } else {
1132       if (idx) *idx = 0;
1133       if (v)   *v   = 0;
1134     }
1135   }
1136   *nz = nztot;
1137   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1138   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1144 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1145 {
1146   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1147 
1148   PetscFunctionBegin;
1149   if (!baij->getrowactive) {
1150     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1151   }
1152   baij->getrowactive = PETSC_FALSE;
1153   PetscFunctionReturn(0);
1154 }
1155 
1156 #undef __FUNCT__
1157 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1158 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1159 {
1160   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1161   PetscErrorCode ierr;
1162 
1163   PetscFunctionBegin;
1164   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1165   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1171 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1172 {
1173   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1174   Mat            A = a->A,B = a->B;
1175   PetscErrorCode ierr;
1176   PetscReal      isend[5],irecv[5];
1177 
1178   PetscFunctionBegin;
1179   info->block_size     = (PetscReal)matin->bs;
1180   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1181   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1182   isend[3] = info->memory;  isend[4] = info->mallocs;
1183   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1184   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1185   isend[3] += info->memory;  isend[4] += info->mallocs;
1186   if (flag == MAT_LOCAL) {
1187     info->nz_used      = isend[0];
1188     info->nz_allocated = isend[1];
1189     info->nz_unneeded  = isend[2];
1190     info->memory       = isend[3];
1191     info->mallocs      = isend[4];
1192   } else if (flag == MAT_GLOBAL_MAX) {
1193     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1194     info->nz_used      = irecv[0];
1195     info->nz_allocated = irecv[1];
1196     info->nz_unneeded  = irecv[2];
1197     info->memory       = irecv[3];
1198     info->mallocs      = irecv[4];
1199   } else if (flag == MAT_GLOBAL_SUM) {
1200     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1201     info->nz_used      = irecv[0];
1202     info->nz_allocated = irecv[1];
1203     info->nz_unneeded  = irecv[2];
1204     info->memory       = irecv[3];
1205     info->mallocs      = irecv[4];
1206   } else {
1207     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1208   }
1209   info->rows_global       = (PetscReal)A->M;
1210   info->columns_global    = (PetscReal)A->N;
1211   info->rows_local        = (PetscReal)A->m;
1212   info->columns_local     = (PetscReal)A->N;
1213   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1214   info->fill_ratio_needed = 0;
1215   info->factor_mallocs    = 0;
1216   PetscFunctionReturn(0);
1217 }
1218 
1219 #undef __FUNCT__
1220 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1221 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1222 {
1223   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1224   PetscErrorCode ierr;
1225 
1226   PetscFunctionBegin;
1227   switch (op) {
1228   case MAT_NO_NEW_NONZERO_LOCATIONS:
1229   case MAT_YES_NEW_NONZERO_LOCATIONS:
1230   case MAT_COLUMNS_UNSORTED:
1231   case MAT_COLUMNS_SORTED:
1232   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1233   case MAT_KEEP_ZEROED_ROWS:
1234   case MAT_NEW_NONZERO_LOCATION_ERR:
1235     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1236     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1237     break;
1238   case MAT_ROW_ORIENTED:
1239     a->roworiented = PETSC_TRUE;
1240     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1241     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1242     break;
1243   case MAT_ROWS_SORTED:
1244   case MAT_ROWS_UNSORTED:
1245   case MAT_YES_NEW_DIAGONALS:
1246     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1247     break;
1248   case MAT_COLUMN_ORIENTED:
1249     a->roworiented = PETSC_FALSE;
1250     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1251     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1252     break;
1253   case MAT_IGNORE_OFF_PROC_ENTRIES:
1254     a->donotstash = PETSC_TRUE;
1255     break;
1256   case MAT_NO_NEW_DIAGONALS:
1257     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1258   case MAT_USE_HASH_TABLE:
1259     a->ht_flag = PETSC_TRUE;
1260     break;
1261   case MAT_NOT_SYMMETRIC:
1262   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1263   case MAT_HERMITIAN:
1264     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1265   case MAT_SYMMETRIC:
1266   case MAT_STRUCTURALLY_SYMMETRIC:
1267   case MAT_NOT_HERMITIAN:
1268   case MAT_SYMMETRY_ETERNAL:
1269   case MAT_NOT_SYMMETRY_ETERNAL:
1270     break;
1271   default:
1272     SETERRQ(PETSC_ERR_SUP,"unknown option");
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1279 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1280 {
1281   PetscErrorCode ierr;
1282   PetscFunctionBegin;
1283   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1289 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1290 {
1291   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1292   Mat            a = baij->A,b = baij->B;
1293   PetscErrorCode ierr;
1294   PetscInt       s1,s2,s3;
1295 
1296   PetscFunctionBegin;
1297   if (ll != rr) {
1298     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1299   }
1300   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1301   if (rr) {
1302     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1303     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1304     /* Overlap communication with computation. */
1305     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1306     /*} if (ll) { */
1307     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1308     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1309     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1310     /* } */
1311   /* scale  the diagonal block */
1312   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1313 
1314   /* if (rr) { */
1315     /* Do a scatter end and then right scale the off-diagonal block */
1316     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1317     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1318   }
1319 
1320   PetscFunctionReturn(0);
1321 }
1322 
1323 #undef __FUNCT__
1324 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1325 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1326 {
1327   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1328   MPI_Comm          comm = A->comm;
1329   static PetscTruth called = PETSC_FALSE;
1330   PetscErrorCode    ierr;
1331 
1332   PetscFunctionBegin;
1333   if (!a->rank) {
1334     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1335   }
1336   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1337   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1338   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1339   PetscFunctionReturn(0);
1340 }
1341 
1342 #undef __FUNCT__
1343 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1344 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1345 {
1346   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1347   PetscErrorCode ierr;
1348 
1349   PetscFunctionBegin;
1350   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1351   PetscFunctionReturn(0);
1352 }
1353 
1354 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatEqual_MPISBAIJ"
1358 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1359 {
1360   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1361   Mat            a,b,c,d;
1362   PetscTruth     flg;
1363   PetscErrorCode ierr;
1364 
1365   PetscFunctionBegin;
1366   a = matA->A; b = matA->B;
1367   c = matB->A; d = matB->B;
1368 
1369   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1370   if (flg) {
1371     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1372   }
1373   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1374   PetscFunctionReturn(0);
1375 }
1376 
1377 #undef __FUNCT__
1378 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1379 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1380 {
1381   PetscErrorCode ierr;
1382 
1383   PetscFunctionBegin;
1384   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1385   PetscFunctionReturn(0);
1386 }
1387 
1388 #undef __FUNCT__
1389 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1390 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1391 {
1392   PetscErrorCode ierr;
1393   PetscInt       i;
1394   PetscTruth     flg;
1395 
1396   PetscFunctionBegin;
1397   for (i=0; i<n; i++) {
1398     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1399     if (!flg) {
1400       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1401     }
1402   }
1403   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1404   PetscFunctionReturn(0);
1405 }
1406 
1407 
1408 /* -------------------------------------------------------------------*/
1409 static struct _MatOps MatOps_Values = {
1410        MatSetValues_MPISBAIJ,
1411        MatGetRow_MPISBAIJ,
1412        MatRestoreRow_MPISBAIJ,
1413        MatMult_MPISBAIJ,
1414 /* 4*/ MatMultAdd_MPISBAIJ,
1415        MatMultTranspose_MPISBAIJ,
1416        MatMultTransposeAdd_MPISBAIJ,
1417        0,
1418        0,
1419        0,
1420 /*10*/ 0,
1421        0,
1422        0,
1423        MatRelax_MPISBAIJ,
1424        MatTranspose_MPISBAIJ,
1425 /*15*/ MatGetInfo_MPISBAIJ,
1426        MatEqual_MPISBAIJ,
1427        MatGetDiagonal_MPISBAIJ,
1428        MatDiagonalScale_MPISBAIJ,
1429        MatNorm_MPISBAIJ,
1430 /*20*/ MatAssemblyBegin_MPISBAIJ,
1431        MatAssemblyEnd_MPISBAIJ,
1432        0,
1433        MatSetOption_MPISBAIJ,
1434        MatZeroEntries_MPISBAIJ,
1435 /*25*/ 0,
1436        0,
1437        0,
1438        0,
1439        0,
1440 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1441        0,
1442        0,
1443        0,
1444        0,
1445 /*35*/ MatDuplicate_MPISBAIJ,
1446        0,
1447        0,
1448        0,
1449        0,
1450 /*40*/ 0,
1451        MatGetSubMatrices_MPISBAIJ,
1452        MatIncreaseOverlap_MPISBAIJ,
1453        MatGetValues_MPISBAIJ,
1454        0,
1455 /*45*/ MatPrintHelp_MPISBAIJ,
1456        MatScale_MPISBAIJ,
1457        0,
1458        0,
1459        0,
1460 /*50*/ 0,
1461        0,
1462        0,
1463        0,
1464        0,
1465 /*55*/ 0,
1466        0,
1467        MatSetUnfactored_MPISBAIJ,
1468        0,
1469        MatSetValuesBlocked_MPISBAIJ,
1470 /*60*/ 0,
1471        0,
1472        0,
1473        MatGetPetscMaps_Petsc,
1474        0,
1475 /*65*/ 0,
1476        0,
1477        0,
1478        0,
1479        0,
1480 /*70*/ MatGetRowMax_MPISBAIJ,
1481        0,
1482        0,
1483        0,
1484        0,
1485 /*75*/ 0,
1486        0,
1487        0,
1488        0,
1489        0,
1490 /*80*/ 0,
1491        0,
1492        0,
1493        0,
1494        MatLoad_MPISBAIJ,
1495 /*85*/ 0,
1496        0,
1497        0,
1498        0,
1499        0,
1500 /*90*/ 0,
1501        0,
1502        0,
1503        0,
1504        0,
1505 /*95*/ 0,
1506        0,
1507        0,
1508        0};
1509 
1510 
1511 EXTERN_C_BEGIN
1512 #undef __FUNCT__
1513 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1514 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1515 {
1516   PetscFunctionBegin;
1517   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1518   *iscopy = PETSC_FALSE;
1519   PetscFunctionReturn(0);
1520 }
1521 EXTERN_C_END
1522 
1523 EXTERN_C_BEGIN
1524 #undef __FUNCT__
1525 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1526 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1527 {
1528   Mat_MPISBAIJ   *b;
1529   PetscErrorCode ierr;
1530   PetscInt       i,mbs,Mbs;
1531 
1532   PetscFunctionBegin;
1533   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1534 
1535   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1536   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1537   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1538   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1539   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1540   if (d_nnz) {
1541     for (i=0; i<B->m/bs; i++) {
1542       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1543     }
1544   }
1545   if (o_nnz) {
1546     for (i=0; i<B->m/bs; i++) {
1547       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1548     }
1549   }
1550   B->preallocated = PETSC_TRUE;
1551   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1552   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1553   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1554   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1555 
1556   b   = (Mat_MPISBAIJ*)B->data;
1557   mbs = B->m/bs;
1558   Mbs = B->M/bs;
1559   if (mbs*bs != B->m) {
1560     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1561   }
1562 
1563   B->bs  = bs;
1564   b->bs2 = bs*bs;
1565   b->mbs = mbs;
1566   b->nbs = mbs;
1567   b->Mbs = Mbs;
1568   b->Nbs = Mbs;
1569 
1570   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1571   b->rowners[0]    = 0;
1572   for (i=2; i<=b->size; i++) {
1573     b->rowners[i] += b->rowners[i-1];
1574   }
1575   b->rstart    = b->rowners[b->rank];
1576   b->rend      = b->rowners[b->rank+1];
1577   b->cstart    = b->rstart;
1578   b->cend      = b->rend;
1579   for (i=0; i<=b->size; i++) {
1580     b->rowners_bs[i] = b->rowners[i]*bs;
1581   }
1582   b->rstart_bs = b-> rstart*bs;
1583   b->rend_bs   = b->rend*bs;
1584 
1585   b->cstart_bs = b->cstart*bs;
1586   b->cend_bs   = b->cend*bs;
1587 
1588   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1589   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1590   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1591   PetscLogObjectParent(B,b->A);
1592 
1593   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1594   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1595   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1596   PetscLogObjectParent(B,b->B);
1597 
1598   /* build cache for off array entries formed */
1599   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1600 
1601   PetscFunctionReturn(0);
1602 }
1603 EXTERN_C_END
1604 
1605 /*MC
1606    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1607    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1608 
1609    Options Database Keys:
1610 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1611 
1612   Level: beginner
1613 
1614 .seealso: MatCreateMPISBAIJ
1615 M*/
1616 
1617 EXTERN_C_BEGIN
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatCreate_MPISBAIJ"
1620 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1621 {
1622   Mat_MPISBAIJ   *b;
1623   PetscErrorCode ierr;
1624   PetscTruth     flg;
1625 
1626   PetscFunctionBegin;
1627 
1628   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1629   B->data = (void*)b;
1630   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1631 
1632   B->ops->destroy    = MatDestroy_MPISBAIJ;
1633   B->ops->view       = MatView_MPISBAIJ;
1634   B->mapping    = 0;
1635   B->factor     = 0;
1636   B->assembled  = PETSC_FALSE;
1637 
1638   B->insertmode = NOT_SET_VALUES;
1639   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1640   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1641 
1642   /* build local table of row and column ownerships */
1643   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1644   b->cowners    = b->rowners + b->size + 2;
1645   b->rowners_bs = b->cowners + b->size + 2;
1646   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1647 
1648   /* build cache for off array entries formed */
1649   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1650   b->donotstash  = PETSC_FALSE;
1651   b->colmap      = PETSC_NULL;
1652   b->garray      = PETSC_NULL;
1653   b->roworiented = PETSC_TRUE;
1654 
1655 #if defined(PETSC_USE_MAT_SINGLE)
1656   /* stuff for MatSetValues_XXX in single precision */
1657   b->setvalueslen     = 0;
1658   b->setvaluescopy    = PETSC_NULL;
1659 #endif
1660 
1661   /* stuff used in block assembly */
1662   b->barray       = 0;
1663 
1664   /* stuff used for matrix vector multiply */
1665   b->lvec         = 0;
1666   b->Mvctx        = 0;
1667   b->slvec0       = 0;
1668   b->slvec0b      = 0;
1669   b->slvec1       = 0;
1670   b->slvec1a      = 0;
1671   b->slvec1b      = 0;
1672   b->sMvctx       = 0;
1673 
1674   /* stuff for MatGetRow() */
1675   b->rowindices   = 0;
1676   b->rowvalues    = 0;
1677   b->getrowactive = PETSC_FALSE;
1678 
1679   /* hash table stuff */
1680   b->ht           = 0;
1681   b->hd           = 0;
1682   b->ht_size      = 0;
1683   b->ht_flag      = PETSC_FALSE;
1684   b->ht_fact      = 0;
1685   b->ht_total_ct  = 0;
1686   b->ht_insert_ct = 0;
1687 
1688   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1689   if (flg) {
1690     PetscReal fact = 1.39;
1691     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1692     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1693     if (fact <= 1.0) fact = 1.39;
1694     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1695     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1696   }
1697   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1698                                      "MatStoreValues_MPISBAIJ",
1699                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1700   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1701                                      "MatRetrieveValues_MPISBAIJ",
1702                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1703   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1704                                      "MatGetDiagonalBlock_MPISBAIJ",
1705                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1706   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1707                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1708                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1709   B->symmetric                  = PETSC_TRUE;
1710   B->structurally_symmetric     = PETSC_TRUE;
1711   B->symmetric_set              = PETSC_TRUE;
1712   B->structurally_symmetric_set = PETSC_TRUE;
1713   PetscFunctionReturn(0);
1714 }
1715 EXTERN_C_END
1716 
1717 /*MC
1718    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1719 
1720    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1721    and MATMPISBAIJ otherwise.
1722 
1723    Options Database Keys:
1724 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1725 
1726   Level: beginner
1727 
1728 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1729 M*/
1730 
1731 EXTERN_C_BEGIN
1732 #undef __FUNCT__
1733 #define __FUNCT__ "MatCreate_SBAIJ"
1734 PetscErrorCode MatCreate_SBAIJ(Mat A)
1735 {
1736   PetscErrorCode ierr;
1737   PetscMPIInt    size;
1738 
1739   PetscFunctionBegin;
1740   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1741   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1742   if (size == 1) {
1743     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1744   } else {
1745     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1746   }
1747   PetscFunctionReturn(0);
1748 }
1749 EXTERN_C_END
1750 
1751 #undef __FUNCT__
1752 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1753 /*@C
1754    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1755    the user should preallocate the matrix storage by setting the parameters
1756    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1757    performance can be increased by more than a factor of 50.
1758 
1759    Collective on Mat
1760 
1761    Input Parameters:
1762 +  A - the matrix
1763 .  bs   - size of blockk
1764 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1765            submatrix  (same for all local rows)
1766 .  d_nnz - array containing the number of block nonzeros in the various block rows
1767            in the upper triangular and diagonal part of the in diagonal portion of the local
1768            (possibly different for each block row) or PETSC_NULL.  You must leave room
1769            for the diagonal entry even if it is zero.
1770 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1771            submatrix (same for all local rows).
1772 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1773            off-diagonal portion of the local submatrix (possibly different for
1774            each block row) or PETSC_NULL.
1775 
1776 
1777    Options Database Keys:
1778 .   -mat_no_unroll - uses code that does not unroll the loops in the
1779                      block calculations (much slower)
1780 .   -mat_block_size - size of the blocks to use
1781 
1782    Notes:
1783 
1784    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1785    than it must be used on all processors that share the object for that argument.
1786 
1787    If the *_nnz parameter is given then the *_nz parameter is ignored
1788 
1789    Storage Information:
1790    For a square global matrix we define each processor's diagonal portion
1791    to be its local rows and the corresponding columns (a square submatrix);
1792    each processor's off-diagonal portion encompasses the remainder of the
1793    local matrix (a rectangular submatrix).
1794 
1795    The user can specify preallocated storage for the diagonal part of
1796    the local submatrix with either d_nz or d_nnz (not both).  Set
1797    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1798    memory allocation.  Likewise, specify preallocated storage for the
1799    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1800 
1801    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1802    the figure below we depict these three local rows and all columns (0-11).
1803 
1804 .vb
1805            0 1 2 3 4 5 6 7 8 9 10 11
1806           -------------------
1807    row 3  |  o o o d d d o o o o o o
1808    row 4  |  o o o d d d o o o o o o
1809    row 5  |  o o o d d d o o o o o o
1810           -------------------
1811 .ve
1812 
1813    Thus, any entries in the d locations are stored in the d (diagonal)
1814    submatrix, and any entries in the o locations are stored in the
1815    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1816    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1817 
1818    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1819    plus the diagonal part of the d matrix,
1820    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1821    In general, for PDE problems in which most nonzeros are near the diagonal,
1822    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1823    or you will get TERRIBLE performance; see the users' manual chapter on
1824    matrices.
1825 
1826    Level: intermediate
1827 
1828 .keywords: matrix, block, aij, compressed row, sparse, parallel
1829 
1830 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1831 @*/
1832 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1833 {
1834   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1835 
1836   PetscFunctionBegin;
1837   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1838   if (f) {
1839     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1840   }
1841   PetscFunctionReturn(0);
1842 }
1843 
1844 #undef __FUNCT__
1845 #define __FUNCT__ "MatCreateMPISBAIJ"
1846 /*@C
1847    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1848    (block compressed row).  For good matrix assembly performance
1849    the user should preallocate the matrix storage by setting the parameters
1850    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1851    performance can be increased by more than a factor of 50.
1852 
1853    Collective on MPI_Comm
1854 
1855    Input Parameters:
1856 +  comm - MPI communicator
1857 .  bs   - size of blockk
1858 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1859            This value should be the same as the local size used in creating the
1860            y vector for the matrix-vector product y = Ax.
1861 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1862            This value should be the same as the local size used in creating the
1863            x vector for the matrix-vector product y = Ax.
1864 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1865 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1866 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1867            submatrix  (same for all local rows)
1868 .  d_nnz - array containing the number of block nonzeros in the various block rows
1869            in the upper triangular portion of the in diagonal portion of the local
1870            (possibly different for each block block row) or PETSC_NULL.
1871            You must leave room for the diagonal entry even if it is zero.
1872 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1873            submatrix (same for all local rows).
1874 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1875            off-diagonal portion of the local submatrix (possibly different for
1876            each block row) or PETSC_NULL.
1877 
1878    Output Parameter:
1879 .  A - the matrix
1880 
1881    Options Database Keys:
1882 .   -mat_no_unroll - uses code that does not unroll the loops in the
1883                      block calculations (much slower)
1884 .   -mat_block_size - size of the blocks to use
1885 .   -mat_mpi - use the parallel matrix data structures even on one processor
1886                (defaults to using SeqBAIJ format on one processor)
1887 
1888    Notes:
1889    The user MUST specify either the local or global matrix dimensions
1890    (possibly both).
1891 
1892    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1893    than it must be used on all processors that share the object for that argument.
1894 
1895    If the *_nnz parameter is given then the *_nz parameter is ignored
1896 
1897    Storage Information:
1898    For a square global matrix we define each processor's diagonal portion
1899    to be its local rows and the corresponding columns (a square submatrix);
1900    each processor's off-diagonal portion encompasses the remainder of the
1901    local matrix (a rectangular submatrix).
1902 
1903    The user can specify preallocated storage for the diagonal part of
1904    the local submatrix with either d_nz or d_nnz (not both).  Set
1905    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1906    memory allocation.  Likewise, specify preallocated storage for the
1907    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1908 
1909    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1910    the figure below we depict these three local rows and all columns (0-11).
1911 
1912 .vb
1913            0 1 2 3 4 5 6 7 8 9 10 11
1914           -------------------
1915    row 3  |  o o o d d d o o o o o o
1916    row 4  |  o o o d d d o o o o o o
1917    row 5  |  o o o d d d o o o o o o
1918           -------------------
1919 .ve
1920 
1921    Thus, any entries in the d locations are stored in the d (diagonal)
1922    submatrix, and any entries in the o locations are stored in the
1923    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1924    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1925 
1926    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1927    plus the diagonal part of the d matrix,
1928    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1929    In general, for PDE problems in which most nonzeros are near the diagonal,
1930    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1931    or you will get TERRIBLE performance; see the users' manual chapter on
1932    matrices.
1933 
1934    Level: intermediate
1935 
1936 .keywords: matrix, block, aij, compressed row, sparse, parallel
1937 
1938 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1939 @*/
1940 
1941 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1942 {
1943   PetscErrorCode ierr;
1944   PetscMPIInt    size;
1945 
1946   PetscFunctionBegin;
1947   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1948   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1949   if (size > 1) {
1950     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1951     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1952   } else {
1953     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1954     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1955   }
1956   PetscFunctionReturn(0);
1957 }
1958 
1959 
1960 #undef __FUNCT__
1961 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1962 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1963 {
1964   Mat            mat;
1965   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1966   PetscErrorCode ierr;
1967   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1968   PetscScalar    *array;
1969 
1970   PetscFunctionBegin;
1971   *newmat       = 0;
1972   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1973   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1974   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1975 
1976   mat->factor       = matin->factor;
1977   mat->preallocated = PETSC_TRUE;
1978   mat->assembled    = PETSC_TRUE;
1979   mat->insertmode   = NOT_SET_VALUES;
1980 
1981   a = (Mat_MPISBAIJ*)mat->data;
1982   mat->bs  = matin->bs;
1983   a->bs2   = oldmat->bs2;
1984   a->mbs   = oldmat->mbs;
1985   a->nbs   = oldmat->nbs;
1986   a->Mbs   = oldmat->Mbs;
1987   a->Nbs   = oldmat->Nbs;
1988 
1989   a->rstart       = oldmat->rstart;
1990   a->rend         = oldmat->rend;
1991   a->cstart       = oldmat->cstart;
1992   a->cend         = oldmat->cend;
1993   a->size         = oldmat->size;
1994   a->rank         = oldmat->rank;
1995   a->donotstash   = oldmat->donotstash;
1996   a->roworiented  = oldmat->roworiented;
1997   a->rowindices   = 0;
1998   a->rowvalues    = 0;
1999   a->getrowactive = PETSC_FALSE;
2000   a->barray       = 0;
2001   a->rstart_bs    = oldmat->rstart_bs;
2002   a->rend_bs      = oldmat->rend_bs;
2003   a->cstart_bs    = oldmat->cstart_bs;
2004   a->cend_bs      = oldmat->cend_bs;
2005 
2006   /* hash table stuff */
2007   a->ht           = 0;
2008   a->hd           = 0;
2009   a->ht_size      = 0;
2010   a->ht_flag      = oldmat->ht_flag;
2011   a->ht_fact      = oldmat->ht_fact;
2012   a->ht_total_ct  = 0;
2013   a->ht_insert_ct = 0;
2014 
2015   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2016   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2017   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2018   if (oldmat->colmap) {
2019 #if defined (PETSC_USE_CTABLE)
2020     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2021 #else
2022     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2023     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
2024     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2025 #endif
2026   } else a->colmap = 0;
2027 
2028   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2029     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2030     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
2031     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2032   } else a->garray = 0;
2033 
2034   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2035   PetscLogObjectParent(mat,a->lvec);
2036   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2037   PetscLogObjectParent(mat,a->Mvctx);
2038 
2039   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2040   PetscLogObjectParent(mat,a->slvec0);
2041   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2042   PetscLogObjectParent(mat,a->slvec1);
2043 
2044   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2045   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2046   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2047   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2048   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2049   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2050   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2051   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2052   PetscLogObjectParent(mat,a->slvec0);
2053   PetscLogObjectParent(mat,a->slvec1);
2054   PetscLogObjectParent(mat,a->slvec0b);
2055   PetscLogObjectParent(mat,a->slvec1a);
2056   PetscLogObjectParent(mat,a->slvec1b);
2057 
2058   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2059   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2060   a->sMvctx = oldmat->sMvctx;
2061   PetscLogObjectParent(mat,a->sMvctx);
2062 
2063   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2064   PetscLogObjectParent(mat,a->A);
2065   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2066   PetscLogObjectParent(mat,a->B);
2067   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2068   *newmat = mat;
2069   PetscFunctionReturn(0);
2070 }
2071 
2072 #include "petscsys.h"
2073 
2074 #undef __FUNCT__
2075 #define __FUNCT__ "MatLoad_MPISBAIJ"
2076 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2077 {
2078   Mat            A;
2079   PetscErrorCode ierr;
2080   PetscInt       i,nz,j,rstart,rend;
2081   PetscScalar    *vals,*buf;
2082   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2083   MPI_Status     status;
2084   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2085   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2086   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2087   PetscInt       bs=1,Mbs,mbs,extra_rows;
2088   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2089   PetscInt       dcount,kmax,k,nzcount,tmp;
2090   int            fd;
2091 
2092   PetscFunctionBegin;
2093   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2094 
2095   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2096   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2097   if (!rank) {
2098     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2099     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2100     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2101     if (header[3] < 0) {
2102       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2103     }
2104   }
2105 
2106   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2107   M = header[1]; N = header[2];
2108 
2109   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2110 
2111   /*
2112      This code adds extra rows to make sure the number of rows is
2113      divisible by the blocksize
2114   */
2115   Mbs        = M/bs;
2116   extra_rows = bs - M + bs*(Mbs);
2117   if (extra_rows == bs) extra_rows = 0;
2118   else                  Mbs++;
2119   if (extra_rows &&!rank) {
2120     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2121   }
2122 
2123   /* determine ownership of all rows */
2124   mbs        = Mbs/size + ((Mbs % size) > rank);
2125   m          = mbs*bs;
2126   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2127   browners   = rowners + size + 1;
2128   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2129   rowners[0] = 0;
2130   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2131   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2132   rstart = rowners[rank];
2133   rend   = rowners[rank+1];
2134 
2135   /* distribute row lengths to all processors */
2136   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2137   if (!rank) {
2138     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2139     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2140     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2141     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2142     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2143     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2144     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2145   } else {
2146     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2147   }
2148 
2149   if (!rank) {   /* procs[0] */
2150     /* calculate the number of nonzeros on each processor */
2151     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2152     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2153     for (i=0; i<size; i++) {
2154       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2155         procsnz[i] += rowlengths[j];
2156       }
2157     }
2158     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2159 
2160     /* determine max buffer needed and allocate it */
2161     maxnz = 0;
2162     for (i=0; i<size; i++) {
2163       maxnz = PetscMax(maxnz,procsnz[i]);
2164     }
2165     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2166 
2167     /* read in my part of the matrix column indices  */
2168     nz     = procsnz[0];
2169     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2170     mycols = ibuf;
2171     if (size == 1)  nz -= extra_rows;
2172     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2173     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2174 
2175     /* read in every ones (except the last) and ship off */
2176     for (i=1; i<size-1; i++) {
2177       nz   = procsnz[i];
2178       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2179       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2180     }
2181     /* read in the stuff for the last proc */
2182     if (size != 1) {
2183       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2184       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2185       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2186       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2187     }
2188     ierr = PetscFree(cols);CHKERRQ(ierr);
2189   } else {  /* procs[i], i>0 */
2190     /* determine buffer space needed for message */
2191     nz = 0;
2192     for (i=0; i<m; i++) {
2193       nz += locrowlens[i];
2194     }
2195     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2196     mycols = ibuf;
2197     /* receive message of column indices*/
2198     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2199     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2200     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2201   }
2202 
2203   /* loop over local rows, determining number of off diagonal entries */
2204   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2205   odlens   = dlens + (rend-rstart);
2206   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2207   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2208   masked1  = mask    + Mbs;
2209   masked2  = masked1 + Mbs;
2210   rowcount = 0; nzcount = 0;
2211   for (i=0; i<mbs; i++) {
2212     dcount  = 0;
2213     odcount = 0;
2214     for (j=0; j<bs; j++) {
2215       kmax = locrowlens[rowcount];
2216       for (k=0; k<kmax; k++) {
2217         tmp = mycols[nzcount++]/bs; /* block col. index */
2218         if (!mask[tmp]) {
2219           mask[tmp] = 1;
2220           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2221           else masked1[dcount++] = tmp; /* entry in diag portion */
2222         }
2223       }
2224       rowcount++;
2225     }
2226 
2227     dlens[i]  = dcount;  /* d_nzz[i] */
2228     odlens[i] = odcount; /* o_nzz[i] */
2229 
2230     /* zero out the mask elements we set */
2231     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2232     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2233   }
2234 
2235   /* create our matrix */
2236   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2237   ierr = MatSetType(A,type);CHKERRQ(ierr);
2238   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2239   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2240 
2241   if (!rank) {
2242     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2243     /* read in my part of the matrix numerical values  */
2244     nz = procsnz[0];
2245     vals = buf;
2246     mycols = ibuf;
2247     if (size == 1)  nz -= extra_rows;
2248     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2249     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2250 
2251     /* insert into matrix */
2252     jj      = rstart*bs;
2253     for (i=0; i<m; i++) {
2254       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2255       mycols += locrowlens[i];
2256       vals   += locrowlens[i];
2257       jj++;
2258     }
2259 
2260     /* read in other processors (except the last one) and ship out */
2261     for (i=1; i<size-1; i++) {
2262       nz   = procsnz[i];
2263       vals = buf;
2264       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2265       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2266     }
2267     /* the last proc */
2268     if (size != 1){
2269       nz   = procsnz[i] - extra_rows;
2270       vals = buf;
2271       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2272       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2273       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2274     }
2275     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2276 
2277   } else {
2278     /* receive numeric values */
2279     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2280 
2281     /* receive message of values*/
2282     vals   = buf;
2283     mycols = ibuf;
2284     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2285     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2287 
2288     /* insert into matrix */
2289     jj      = rstart*bs;
2290     for (i=0; i<m; i++) {
2291       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2292       mycols += locrowlens[i];
2293       vals   += locrowlens[i];
2294       jj++;
2295     }
2296   }
2297 
2298   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2299   ierr = PetscFree(buf);CHKERRQ(ierr);
2300   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2301   ierr = PetscFree(rowners);CHKERRQ(ierr);
2302   ierr = PetscFree(dlens);CHKERRQ(ierr);
2303   ierr = PetscFree(mask);CHKERRQ(ierr);
2304   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2305   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306   *newmat = A;
2307   PetscFunctionReturn(0);
2308 }
2309 
2310 #undef __FUNCT__
2311 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2312 /*XXXXX@
2313    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2314 
2315    Input Parameters:
2316 .  mat  - the matrix
2317 .  fact - factor
2318 
2319    Collective on Mat
2320 
2321    Level: advanced
2322 
2323   Notes:
2324    This can also be set by the command line option: -mat_use_hash_table fact
2325 
2326 .keywords: matrix, hashtable, factor, HT
2327 
2328 .seealso: MatSetOption()
2329 @XXXXX*/
2330 
2331 
2332 #undef __FUNCT__
2333 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2334 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2335 {
2336   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2337   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2338   PetscReal      atmp;
2339   PetscReal      *work,*svalues,*rvalues;
2340   PetscErrorCode ierr;
2341   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2342   PetscMPIInt    rank,size;
2343   PetscInt       *rowners_bs,dest,count,source;
2344   PetscScalar    *va;
2345   MatScalar      *ba;
2346   MPI_Status     stat;
2347 
2348   PetscFunctionBegin;
2349   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2350   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2351 
2352   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2353   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2354 
2355   bs   = A->bs;
2356   mbs  = a->mbs;
2357   Mbs  = a->Mbs;
2358   ba   = b->a;
2359   bi   = b->i;
2360   bj   = b->j;
2361 
2362   /* find ownerships */
2363   rowners_bs = a->rowners_bs;
2364 
2365   /* each proc creates an array to be distributed */
2366   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2367   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2368 
2369   /* row_max for B */
2370   if (rank != size-1){
2371     for (i=0; i<mbs; i++) {
2372       ncols = bi[1] - bi[0]; bi++;
2373       brow  = bs*i;
2374       for (j=0; j<ncols; j++){
2375         bcol = bs*(*bj);
2376         for (kcol=0; kcol<bs; kcol++){
2377           col = bcol + kcol;                 /* local col index */
2378           col += rowners_bs[rank+1];      /* global col index */
2379           for (krow=0; krow<bs; krow++){
2380             atmp = PetscAbsScalar(*ba); ba++;
2381             row = brow + krow;    /* local row index */
2382             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2383             if (work[col] < atmp) work[col] = atmp;
2384           }
2385         }
2386         bj++;
2387       }
2388     }
2389 
2390     /* send values to its owners */
2391     for (dest=rank+1; dest<size; dest++){
2392       svalues = work + rowners_bs[dest];
2393       count   = rowners_bs[dest+1]-rowners_bs[dest];
2394       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2395     }
2396   }
2397 
2398   /* receive values */
2399   if (rank){
2400     rvalues = work;
2401     count   = rowners_bs[rank+1]-rowners_bs[rank];
2402     for (source=0; source<rank; source++){
2403       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2404       /* process values */
2405       for (i=0; i<count; i++){
2406         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2407       }
2408     }
2409   }
2410 
2411   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2412   ierr = PetscFree(work);CHKERRQ(ierr);
2413   PetscFunctionReturn(0);
2414 }
2415 
2416 #undef __FUNCT__
2417 #define __FUNCT__ "MatRelax_MPISBAIJ"
2418 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2419 {
2420   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2421   PetscErrorCode ierr;
2422   PetscInt       mbs=mat->mbs,bs=matin->bs;
2423   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2424   Vec            bb1;
2425 
2426   PetscFunctionBegin;
2427   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2428   if (bs > 1)
2429     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2430 
2431   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2432     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2433       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2434       its--;
2435     }
2436 
2437     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2438     while (its--){
2439 
2440       /* lower triangular part: slvec0b = - B^T*xx */
2441       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2442 
2443       /* copy xx into slvec0a */
2444       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2445       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2446       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2447       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2448 
2449       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2450 
2451       /* copy bb into slvec1a */
2452       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2453       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2454       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2455       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2456 
2457       /* set slvec1b = 0 */
2458       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2459 
2460       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2461       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2462       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2463       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2464 
2465       /* upper triangular part: bb1 = bb1 - B*x */
2466       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2467 
2468       /* local diagonal sweep */
2469       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2470     }
2471     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2472   } else {
2473     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2474   }
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2480 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2481 {
2482   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2483   PetscErrorCode ierr;
2484   PetscScalar    mone=-1.0;
2485   Vec            lvec1,bb1;
2486 
2487   PetscFunctionBegin;
2488   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2489   if (matin->bs > 1)
2490     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2491 
2492   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2493     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2494       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2495       its--;
2496     }
2497 
2498     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2499     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2500     while (its--){
2501       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2502 
2503       /* lower diagonal part: bb1 = bb - B^T*xx */
2504       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2505       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2506 
2507       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2508       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2509       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2510 
2511       /* upper diagonal part: bb1 = bb1 - B*x */
2512       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2513       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2514 
2515       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2516 
2517       /* diagonal sweep */
2518       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2519     }
2520     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2521     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2522   } else {
2523     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2524   }
2525   PetscFunctionReturn(0);
2526 }
2527 
2528