xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 9af31e4ad595286b4e2df8194fee047feeccfe42)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 extern int MatSetUpMultiply_MPISBAIJ(Mat);
7 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 extern int DisAssemble_MPISBAIJ(Mat);
9 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
10 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 int MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   int          ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 int MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   int          ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   int          ierr,i,N = m*n;
238   MatScalar    *vsingle;
239 
240   PetscFunctionBegin;
241   if (N > b->setvalueslen) {
242     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
243     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
244     b->setvalueslen  = N;
245   }
246   vsingle = b->setvaluescopy;
247 
248   for (i=0; i<N; i++) {
249     vsingle[i] = v[i];
250   }
251   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
258 {
259   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
260   int         ierr,i,N = m*n*b->bs2;
261   MatScalar   *vsingle;
262 
263   PetscFunctionBegin;
264   if (N > b->setvalueslen) {
265     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
266     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
267     b->setvalueslen  = N;
268   }
269   vsingle = b->setvaluescopy;
270   for (i=0; i<N; i++) {
271     vsingle[i] = v[i];
272   }
273   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
279 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
282   int         ierr,i,N = m*n;
283   MatScalar   *vsingle;
284 
285   PetscFunctionBegin;
286   SETERRQ(1,"Function not yet written for SBAIJ format");
287   /* PetscFunctionReturn(0); */
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
292 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
293 {
294   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
295   int         ierr,i,N = m*n*b->bs2;
296   MatScalar   *vsingle;
297 
298   PetscFunctionBegin;
299   SETERRQ(1,"Function not yet written for SBAIJ format");
300   /* PetscFunctionReturn(0); */
301 }
302 #endif
303 
304 /* Only add/insert a(i,j) with i<=j (blocks).
305    Any a(i,j) with i>j input by user is ingored.
306 */
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
309 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
310 {
311   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
312   MatScalar    value;
313   PetscTruth   roworiented = baij->roworiented;
314   int          ierr,i,j,row,col;
315   int          rstart_orig=baij->rstart_bs;
316   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
317   int          cend_orig=baij->cend_bs,bs=baij->bs;
318 
319   /* Some Variables required in the macro */
320   Mat          A = baij->A;
321   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
322   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar    *aa=a->a;
324 
325   Mat          B = baij->B;
326   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
327   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar    *ba=b->a;
329 
330   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   int          low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar    *ap,*bap;
333 
334   /* for stash */
335   int          n_loc, *in_loc=0;
336   MatScalar    *v_loc=0;
337 
338   PetscFunctionBegin;
339 
340   if(!baij->donotstash){
341     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
342     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
343   }
344 
345   for (i=0; i<m; i++) {
346     if (im[i] < 0) continue;
347 #if defined(PETSC_USE_BOPT_g)
348     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
349 #endif
350     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
351       row = im[i] - rstart_orig;              /* local row index */
352       for (j=0; j<n; j++) {
353         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
354         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
355           col = in[j] - cstart_orig;          /* local col index */
356           brow = row/bs; bcol = col/bs;
357           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
358           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
359           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
360           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
361         } else if (in[j] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
364 #endif
365         else {  /* off-diag entry (B) */
366           if (mat->was_assembled) {
367             if (!baij->colmap) {
368               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
369             }
370 #if defined (PETSC_USE_CTABLE)
371             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
372             col  = col - 1;
373 #else
374             col = baij->colmap[in[j]/bs] - 1;
375 #endif
376             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
377               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
378               col =  in[j];
379               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
380               B = baij->B;
381               b = (Mat_SeqBAIJ*)(B)->data;
382               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
383               ba=b->a;
384             } else col += in[j]%bs;
385           } else col = in[j];
386           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
387           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
388           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
389         }
390       }
391     } else {  /* off processor entry */
392       if (!baij->donotstash) {
393         n_loc = 0;
394         for (j=0; j<n; j++){
395           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
396           in_loc[n_loc] = in[j];
397           if (roworiented) {
398             v_loc[n_loc] = v[i*n+j];
399           } else {
400             v_loc[n_loc] = v[j*m+i];
401           }
402           n_loc++;
403         }
404         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
405       }
406     }
407   }
408 
409   if(!baij->donotstash){
410     ierr = PetscFree(in_loc);CHKERRQ(ierr);
411     ierr = PetscFree(v_loc);CHKERRQ(ierr);
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
418 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
419 {
420   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
421   const MatScalar *value;
422   MatScalar       *barray=baij->barray;
423   PetscTruth      roworiented = baij->roworiented;
424   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
425   int             rend=baij->rend,cstart=baij->cstart,stepval;
426   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
427 
428   PetscFunctionBegin;
429   if(!barray) {
430     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
431     baij->barray = barray;
432   }
433 
434   if (roworiented) {
435     stepval = (n-1)*bs;
436   } else {
437     stepval = (m-1)*bs;
438   }
439   for (i=0; i<m; i++) {
440     if (im[i] < 0) continue;
441 #if defined(PETSC_USE_BOPT_g)
442     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
443 #endif
444     if (im[i] >= rstart && im[i] < rend) {
445       row = im[i] - rstart;
446       for (j=0; j<n; j++) {
447         /* If NumCol = 1 then a copy is not required */
448         if ((roworiented) && (n == 1)) {
449           barray = (MatScalar*) v + i*bs2;
450         } else if((!roworiented) && (m == 1)) {
451           barray = (MatScalar*) v + j*bs2;
452         } else { /* Here a copy is required */
453           if (roworiented) {
454             value = v + i*(stepval+bs)*bs + j*bs;
455           } else {
456             value = v + j*(stepval+bs)*bs + i*bs;
457           }
458           for (ii=0; ii<bs; ii++,value+=stepval) {
459             for (jj=0; jj<bs; jj++) {
460               *barray++  = *value++;
461             }
462           }
463           barray -=bs2;
464         }
465 
466         if (in[j] >= cstart && in[j] < cend){
467           col  = in[j] - cstart;
468           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
469         }
470         else if (in[j] < 0) continue;
471 #if defined(PETSC_USE_BOPT_g)
472         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
473 #endif
474         else {
475           if (mat->was_assembled) {
476             if (!baij->colmap) {
477               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
478             }
479 
480 #if defined(PETSC_USE_BOPT_g)
481 #if defined (PETSC_USE_CTABLE)
482             { int data;
483               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
484               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
485             }
486 #else
487             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
488 #endif
489 #endif
490 #if defined (PETSC_USE_CTABLE)
491 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
492             col  = (col - 1)/bs;
493 #else
494             col = (baij->colmap[in[j]] - 1)/bs;
495 #endif
496             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
497               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
498               col =  in[j];
499             }
500           }
501           else col = in[j];
502           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
503         }
504       }
505     } else {
506       if (!baij->donotstash) {
507         if (roworiented) {
508           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
509         } else {
510           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
511         }
512       }
513     }
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 #define HASH_KEY 0.6180339887
519 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
520 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
521 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 #undef __FUNCT__
523 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
524 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
525 {
526   PetscFunctionBegin;
527   SETERRQ(1,"Function not yet written for SBAIJ format");
528   /* PetscFunctionReturn(0); */
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
533 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
534 {
535   PetscFunctionBegin;
536   SETERRQ(1,"Function not yet written for SBAIJ format");
537   /* PetscFunctionReturn(0); */
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatGetValues_MPISBAIJ"
542 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
543 {
544   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
545   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
546   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
547 
548   PetscFunctionBegin;
549   for (i=0; i<m; i++) {
550     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
551     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
552     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
553       row = idxm[i] - bsrstart;
554       for (j=0; j<n; j++) {
555         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
556         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
557         if (idxn[j] >= bscstart && idxn[j] < bscend){
558           col = idxn[j] - bscstart;
559           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
560         } else {
561           if (!baij->colmap) {
562             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
563           }
564 #if defined (PETSC_USE_CTABLE)
565           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
566           data --;
567 #else
568           data = baij->colmap[idxn[j]/bs]-1;
569 #endif
570           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
571           else {
572             col  = data + idxn[j]%bs;
573             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
574           }
575         }
576       }
577     } else {
578       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
579     }
580   }
581  PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatNorm_MPISBAIJ"
586 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
587 {
588   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
589   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
590   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
591   int        ierr;
592   PetscReal  sum[2],*lnorm2;
593 
594   PetscFunctionBegin;
595   if (baij->size == 1) {
596     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
597   } else {
598     if (type == NORM_FROBENIUS) {
599       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
600       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
601       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
602       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
603       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
604       /*
605       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
606       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
607       */
608       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
609       /*
610       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
611       PetscSynchronizedFlush(mat->comm); */
612 
613       *norm = sqrt(sum[0] + 2*sum[1]);
614       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
617     }
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 /*
623   Creates the hash table, and sets the table
624   This table is created only once.
625   If new entried need to be added to the matrix
626   then the hash table has to be destroyed and
627   recreated.
628 */
629 #undef __FUNCT__
630 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
631 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
632 {
633   PetscFunctionBegin;
634   SETERRQ(1,"Function not yet written for SBAIJ format");
635   /* PetscFunctionReturn(0); */
636 }
637 
638 #undef __FUNCT__
639 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
640 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
641 {
642   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
643   int         ierr,nstash,reallocs;
644   InsertMode  addv;
645 
646   PetscFunctionBegin;
647   if (baij->donotstash) {
648     PetscFunctionReturn(0);
649   }
650 
651   /* make sure all processors are either in INSERTMODE or ADDMODE */
652   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
653   if (addv == (ADD_VALUES|INSERT_VALUES)) {
654     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
655   }
656   mat->insertmode = addv; /* in case this processor had no cache */
657 
658   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
659   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
660   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
661   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
669 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
672   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
673   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
674   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
675   int         *row,*col,other_disassembled;
676   PetscTruth  r1,r2,r3;
677   MatScalar   *val;
678   InsertMode  addv = mat->insertmode;
679 
680   PetscFunctionBegin;
681 
682   if (!baij->donotstash) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       /*
686       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
687       PetscSynchronizedFlush(mat->comm);
688       */
689       if (!flg) break;
690 
691       for (i=0; i<n;) {
692         /* Now identify the consecutive vals belonging to the same row */
693         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
694         if (j < n) ncols = j-i;
695         else       ncols = n-i;
696         /* Now assemble all these values with a single function call */
697         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702     /* Now process the block-stash. Since the values are stashed column-oriented,
703        set the roworiented flag to column oriented, and after MatSetValues()
704        restore the original flags */
705     r1 = baij->roworiented;
706     r2 = a->roworiented;
707     r3 = b->roworiented;
708     baij->roworiented = PETSC_FALSE;
709     a->roworiented    = PETSC_FALSE;
710     b->roworiented    = PETSC_FALSE;
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
718         if (j < n) ncols = j-i;
719         else       ncols = n-i;
720         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
721         i = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
725     baij->roworiented = r1;
726     a->roworiented    = r2;
727     b->roworiented    = r3;
728   }
729 
730   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
731   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
732 
733   /* determine if any processor has disassembled, if so we must
734      also disassemble ourselfs, in order that we may reassemble. */
735   /*
736      if nonzero structure of submatrix B cannot change then we know that
737      no processor disassembled thus we can skip this stuff
738   */
739   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
740     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
741     if (mat->was_assembled && !other_disassembled) {
742       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
743     }
744   }
745 
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
748   }
749   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
750   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
751 
752 #if defined(PETSC_USE_BOPT_g)
753   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
754     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
755     baij->ht_total_ct  = 0;
756     baij->ht_insert_ct = 0;
757   }
758 #endif
759   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
760     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
761     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
762     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
763   }
764 
765   if (baij->rowvalues) {
766     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
767     baij->rowvalues = 0;
768   }
769 
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
775 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
776 {
777   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
778   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
779   PetscTruth        iascii,isdraw;
780   PetscViewer       sviewer;
781   PetscViewerFormat format;
782 
783   PetscFunctionBegin;
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
786   if (iascii) {
787     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
788     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
789       MatInfo info;
790       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
791       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
793               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
794               baij->bs,(int)info.memory);CHKERRQ(ierr);
795       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
796       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
797       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
798       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
799       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
800       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
801       PetscFunctionReturn(0);
802     } else if (format == PETSC_VIEWER_ASCII_INFO) {
803       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw       draw;
810     PetscTruth isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
813   }
814 
815   if (size == 1) {
816     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
817     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
818   } else {
819     /* assemble the entire matrix onto first processor. */
820     Mat         A;
821     Mat_SeqSBAIJ *Aloc;
822     Mat_SeqBAIJ *Bloc;
823     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
824     MatScalar   *a;
825 
826     /* Should this be the same type as mat? */
827     if (!rank) {
828       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
829     } else {
830       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
831     }
832     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
833     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
834     PetscLogObjectParent(mat,A);
835 
836     /* copy over the A part */
837     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
838     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
839     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
840 
841     for (i=0; i<mbs; i++) {
842       rvals[0] = bs*(baij->rstart + i);
843       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
844       for (j=ai[i]; j<ai[i+1]; j++) {
845         col = (baij->cstart+aj[j])*bs;
846         for (k=0; k<bs; k++) {
847           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
848           col++; a += bs;
849         }
850       }
851     }
852     /* copy over the B part */
853     Bloc = (Mat_SeqBAIJ*)baij->B->data;
854     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
855     for (i=0; i<mbs; i++) {
856       rvals[0] = bs*(baij->rstart + i);
857       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
858       for (j=ai[i]; j<ai[i+1]; j++) {
859         col = baij->garray[aj[j]]*bs;
860         for (k=0; k<bs; k++) {
861           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
862           col++; a += bs;
863         }
864       }
865     }
866     ierr = PetscFree(rvals);CHKERRQ(ierr);
867     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
868     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
869     /*
870        Everyone has to call to draw the matrix since the graphics waits are
871        synchronized across all processors that share the PetscDraw object
872     */
873     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
874     if (!rank) {
875       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
876       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
877     }
878     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
879     ierr = MatDestroy(A);CHKERRQ(ierr);
880   }
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatView_MPISBAIJ"
886 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
887 {
888   int        ierr;
889   PetscTruth iascii,isdraw,issocket,isbinary;
890 
891   PetscFunctionBegin;
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
894   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
895   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
896   if (iascii || isdraw || issocket || isbinary) {
897     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
898   } else {
899     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
900   }
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatDestroy_MPISBAIJ"
906 int MatDestroy_MPISBAIJ(Mat mat)
907 {
908   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
909   int         ierr;
910 
911   PetscFunctionBegin;
912 #if defined(PETSC_USE_LOG)
913   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
914 #endif
915   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
916   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
917   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
918   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
919   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
920 #if defined (PETSC_USE_CTABLE)
921   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
922 #else
923   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
924 #endif
925   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
926   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
927   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
928   if (baij->slvec0) {
929     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
930     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
931   }
932   if (baij->slvec1) {
933     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
934     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
935     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
936   }
937   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
938   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
939   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
940   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
941 #if defined(PETSC_USE_MAT_SINGLE)
942   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
943 #endif
944   ierr = PetscFree(baij);CHKERRQ(ierr);
945   PetscFunctionReturn(0);
946 }
947 
948 #undef __FUNCT__
949 #define __FUNCT__ "MatMult_MPISBAIJ"
950 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
951 {
952   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
953   int         ierr,nt,mbs=a->mbs,bs=a->bs;
954   PetscScalar *x,*from,zero=0.0;
955 
956   PetscFunctionBegin;
957   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
958   if (nt != A->n) {
959     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
960   }
961   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
962   if (nt != A->m) {
963     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
964   }
965 
966   /* diagonal part */
967   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
968   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
969 
970   /* subdiagonal part */
971   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
972 
973   /* copy x into the vec slvec0 */
974   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
975   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
976   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
977   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
978 
979   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
980   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
981   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
982 
983   /* supperdiagonal part */
984   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
985 
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
991 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
992 {
993   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
994   int         ierr,nt;
995 
996   PetscFunctionBegin;
997   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
998   if (nt != A->n) {
999     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1000   }
1001   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1002   if (nt != A->m) {
1003     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1004   }
1005 
1006   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1007   /* do diagonal part */
1008   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1009   /* do supperdiagonal part */
1010   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1011   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1012   /* do subdiagonal part */
1013   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1014   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1016 
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1022 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1023 {
1024   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1025   int          ierr,mbs=a->mbs,bs=a->bs;
1026   PetscScalar  *x,*from,zero=0.0;
1027 
1028   PetscFunctionBegin;
1029   /*
1030   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1031   PetscSynchronizedFlush(A->comm);
1032   */
1033   /* diagonal part */
1034   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1035   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1036 
1037   /* subdiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1039 
1040   /* copy x into the vec slvec0 */
1041   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1042   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1043   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1044   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1045 
1046   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1047   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1048   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1049 
1050   /* supperdiagonal part */
1051   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1052 
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1058 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1059 {
1060   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1061   int        ierr;
1062 
1063   PetscFunctionBegin;
1064   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1065   /* do diagonal part */
1066   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1067   /* do supperdiagonal part */
1068   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1069   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1070 
1071   /* do subdiagonal part */
1072   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1073   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1074   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1075 
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1081 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1082 {
1083   int ierr;
1084 
1085   PetscFunctionBegin;
1086   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1092 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1093 {
1094   int ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 /*
1102   This only works correctly for square matrices where the subblock A->A is the
1103    diagonal block
1104 */
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1107 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1108 {
1109   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1110   int         ierr;
1111 
1112   PetscFunctionBegin;
1113   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1114   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatScale_MPISBAIJ"
1120 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1121 {
1122   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1123   int         ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1127   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1133 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1134 {
1135   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1136   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1137   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1138   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1139   int            *cmap,*idx_p,cstart = mat->cstart;
1140 
1141   PetscFunctionBegin;
1142   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1143   mat->getrowactive = PETSC_TRUE;
1144 
1145   if (!mat->rowvalues && (idx || v)) {
1146     /*
1147         allocate enough space to hold information from the longest row.
1148     */
1149     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1150     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1151     int     max = 1,mbs = mat->mbs,tmp;
1152     for (i=0; i<mbs; i++) {
1153       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1154       if (max < tmp) { max = tmp; }
1155     }
1156     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1157     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1158   }
1159 
1160   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1161   lrow = row - brstart;  /* local row index */
1162 
1163   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1164   if (!v)   {pvA = 0; pvB = 0;}
1165   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1166   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1167   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1168   nztot = nzA + nzB;
1169 
1170   cmap  = mat->garray;
1171   if (v  || idx) {
1172     if (nztot) {
1173       /* Sort by increasing column numbers, assuming A and B already sorted */
1174       int imark = -1;
1175       if (v) {
1176         *v = v_p = mat->rowvalues;
1177         for (i=0; i<nzB; i++) {
1178           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1179           else break;
1180         }
1181         imark = i;
1182         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1183         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1184       }
1185       if (idx) {
1186         *idx = idx_p = mat->rowindices;
1187         if (imark > -1) {
1188           for (i=0; i<imark; i++) {
1189             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1190           }
1191         } else {
1192           for (i=0; i<nzB; i++) {
1193             if (cmap[cworkB[i]/bs] < cstart)
1194               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1195             else break;
1196           }
1197           imark = i;
1198         }
1199         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1200         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1201       }
1202     } else {
1203       if (idx) *idx = 0;
1204       if (v)   *v   = 0;
1205     }
1206   }
1207   *nz = nztot;
1208   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1209   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1215 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1216 {
1217   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1218 
1219   PetscFunctionBegin;
1220   if (baij->getrowactive == PETSC_FALSE) {
1221     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1222   }
1223   baij->getrowactive = PETSC_FALSE;
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1229 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1230 {
1231   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1232 
1233   PetscFunctionBegin;
1234   *bs = baij->bs;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1240 int MatZeroEntries_MPISBAIJ(Mat A)
1241 {
1242   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1243   int         ierr;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1247   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1253 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1254 {
1255   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1256   Mat         A = a->A,B = a->B;
1257   int         ierr;
1258   PetscReal   isend[5],irecv[5];
1259 
1260   PetscFunctionBegin;
1261   info->block_size     = (PetscReal)a->bs;
1262   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1263   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1264   isend[3] = info->memory;  isend[4] = info->mallocs;
1265   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1266   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1267   isend[3] += info->memory;  isend[4] += info->mallocs;
1268   if (flag == MAT_LOCAL) {
1269     info->nz_used      = isend[0];
1270     info->nz_allocated = isend[1];
1271     info->nz_unneeded  = isend[2];
1272     info->memory       = isend[3];
1273     info->mallocs      = isend[4];
1274   } else if (flag == MAT_GLOBAL_MAX) {
1275     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1276     info->nz_used      = irecv[0];
1277     info->nz_allocated = irecv[1];
1278     info->nz_unneeded  = irecv[2];
1279     info->memory       = irecv[3];
1280     info->mallocs      = irecv[4];
1281   } else if (flag == MAT_GLOBAL_SUM) {
1282     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1283     info->nz_used      = irecv[0];
1284     info->nz_allocated = irecv[1];
1285     info->nz_unneeded  = irecv[2];
1286     info->memory       = irecv[3];
1287     info->mallocs      = irecv[4];
1288   } else {
1289     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1290   }
1291   info->rows_global       = (PetscReal)A->M;
1292   info->columns_global    = (PetscReal)A->N;
1293   info->rows_local        = (PetscReal)A->m;
1294   info->columns_local     = (PetscReal)A->N;
1295   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1296   info->fill_ratio_needed = 0;
1297   info->factor_mallocs    = 0;
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1303 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1304 {
1305   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1306   int         ierr;
1307 
1308   PetscFunctionBegin;
1309   switch (op) {
1310   case MAT_NO_NEW_NONZERO_LOCATIONS:
1311   case MAT_YES_NEW_NONZERO_LOCATIONS:
1312   case MAT_COLUMNS_UNSORTED:
1313   case MAT_COLUMNS_SORTED:
1314   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1315   case MAT_KEEP_ZEROED_ROWS:
1316   case MAT_NEW_NONZERO_LOCATION_ERR:
1317     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1318     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1319     break;
1320   case MAT_ROW_ORIENTED:
1321     a->roworiented = PETSC_TRUE;
1322     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1323     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1324     break;
1325   case MAT_ROWS_SORTED:
1326   case MAT_ROWS_UNSORTED:
1327   case MAT_YES_NEW_DIAGONALS:
1328     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1329     break;
1330   case MAT_COLUMN_ORIENTED:
1331     a->roworiented = PETSC_FALSE;
1332     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1333     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1334     break;
1335   case MAT_IGNORE_OFF_PROC_ENTRIES:
1336     a->donotstash = PETSC_TRUE;
1337     break;
1338   case MAT_NO_NEW_DIAGONALS:
1339     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1340   case MAT_USE_HASH_TABLE:
1341     a->ht_flag = PETSC_TRUE;
1342     break;
1343   case MAT_NOT_SYMMETRIC:
1344   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1345   case MAT_HERMITIAN:
1346     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1347   case MAT_SYMMETRIC:
1348   case MAT_STRUCTURALLY_SYMMETRIC:
1349   case MAT_NOT_HERMITIAN:
1350   case MAT_SYMMETRY_ETERNAL:
1351   case MAT_NOT_SYMMETRY_ETERNAL:
1352     break;
1353   default:
1354     SETERRQ(PETSC_ERR_SUP,"unknown option");
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1361 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1362 {
1363   int ierr;
1364   PetscFunctionBegin;
1365   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1371 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1372 {
1373   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1374   Mat         a = baij->A,b = baij->B;
1375   int         ierr,s1,s2,s3;
1376 
1377   PetscFunctionBegin;
1378   if (ll != rr) {
1379     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1380   }
1381   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1382   if (rr) {
1383     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1384     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1385     /* Overlap communication with computation. */
1386     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1387     /*} if (ll) { */
1388     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1389     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1390     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1391     /* } */
1392   /* scale  the diagonal block */
1393   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1394 
1395   /* if (rr) { */
1396     /* Do a scatter end and then right scale the off-diagonal block */
1397     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1398     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1399   }
1400 
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1406 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1407 {
1408   PetscFunctionBegin;
1409   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1414 int MatPrintHelp_MPISBAIJ(Mat A)
1415 {
1416   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1417   MPI_Comm    comm = A->comm;
1418   static int  called = 0;
1419   int         ierr;
1420 
1421   PetscFunctionBegin;
1422   if (!a->rank) {
1423     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1424   }
1425   if (called) {PetscFunctionReturn(0);} else called = 1;
1426   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1427   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1433 int MatSetUnfactored_MPISBAIJ(Mat A)
1434 {
1435   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1436   int         ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatEqual_MPISBAIJ"
1447 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1448 {
1449   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1450   Mat         a,b,c,d;
1451   PetscTruth  flg;
1452   int         ierr;
1453 
1454   PetscFunctionBegin;
1455   a = matA->A; b = matA->B;
1456   c = matB->A; d = matB->B;
1457 
1458   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1459   if (flg == PETSC_TRUE) {
1460     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1461   }
1462   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1468 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1469 {
1470   int        ierr;
1471 
1472   PetscFunctionBegin;
1473   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1479 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1480 {
1481   int        i,ierr;
1482   PetscTruth flg;
1483 
1484   for (i=0; i<n; i++) {
1485     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1486     if (!flg) {
1487       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1488     }
1489   }
1490   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 
1495 /* -------------------------------------------------------------------*/
1496 static struct _MatOps MatOps_Values = {
1497        MatSetValues_MPISBAIJ,
1498        MatGetRow_MPISBAIJ,
1499        MatRestoreRow_MPISBAIJ,
1500        MatMult_MPISBAIJ,
1501 /* 4*/ MatMultAdd_MPISBAIJ,
1502        MatMultTranspose_MPISBAIJ,
1503        MatMultTransposeAdd_MPISBAIJ,
1504        0,
1505        0,
1506        0,
1507 /*10*/ 0,
1508        0,
1509        0,
1510        MatRelax_MPISBAIJ,
1511        MatTranspose_MPISBAIJ,
1512 /*15*/ MatGetInfo_MPISBAIJ,
1513        MatEqual_MPISBAIJ,
1514        MatGetDiagonal_MPISBAIJ,
1515        MatDiagonalScale_MPISBAIJ,
1516        MatNorm_MPISBAIJ,
1517 /*20*/ MatAssemblyBegin_MPISBAIJ,
1518        MatAssemblyEnd_MPISBAIJ,
1519        0,
1520        MatSetOption_MPISBAIJ,
1521        MatZeroEntries_MPISBAIJ,
1522 /*25*/ MatZeroRows_MPISBAIJ,
1523        0,
1524        0,
1525        0,
1526        0,
1527 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*35*/ MatDuplicate_MPISBAIJ,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*40*/ 0,
1538        MatGetSubMatrices_MPISBAIJ,
1539        MatIncreaseOverlap_MPISBAIJ,
1540        MatGetValues_MPISBAIJ,
1541        0,
1542 /*45*/ MatPrintHelp_MPISBAIJ,
1543        MatScale_MPISBAIJ,
1544        0,
1545        0,
1546        0,
1547 /*50*/ MatGetBlockSize_MPISBAIJ,
1548        0,
1549        0,
1550        0,
1551        0,
1552 /*55*/ 0,
1553        0,
1554        MatSetUnfactored_MPISBAIJ,
1555        0,
1556        MatSetValuesBlocked_MPISBAIJ,
1557 /*60*/ 0,
1558        0,
1559        0,
1560        MatGetPetscMaps_Petsc,
1561        0,
1562 /*65*/ 0,
1563        0,
1564        0,
1565        0,
1566        0,
1567 /*70*/ MatGetRowMax_MPISBAIJ,
1568        0,
1569        0,
1570        0,
1571        0,
1572 /*75*/ 0,
1573        0,
1574        0,
1575        0,
1576        0,
1577 /*80*/ 0,
1578        0,
1579        0,
1580        0,
1581 /*85*/ MatLoad_MPISBAIJ
1582 };
1583 
1584 
1585 EXTERN_C_BEGIN
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1588 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1589 {
1590   PetscFunctionBegin;
1591   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1592   *iscopy = PETSC_FALSE;
1593   PetscFunctionReturn(0);
1594 }
1595 EXTERN_C_END
1596 
1597 EXTERN_C_BEGIN
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1600 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1601 {
1602   Mat_MPISBAIJ *b;
1603   int          ierr,i,mbs,Mbs;
1604 
1605   PetscFunctionBegin;
1606   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1607 
1608   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1609   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1610   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1611   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1612   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1613   if (d_nnz) {
1614     for (i=0; i<B->m/bs; i++) {
1615       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1616     }
1617   }
1618   if (o_nnz) {
1619     for (i=0; i<B->m/bs; i++) {
1620       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1621     }
1622   }
1623   B->preallocated = PETSC_TRUE;
1624   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1625   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1626   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1627   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1628 
1629   b   = (Mat_MPISBAIJ*)B->data;
1630   mbs = B->m/bs;
1631   Mbs = B->M/bs;
1632   if (mbs*bs != B->m) {
1633     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1634   }
1635 
1636   b->bs  = bs;
1637   b->bs2 = bs*bs;
1638   b->mbs = mbs;
1639   b->nbs = mbs;
1640   b->Mbs = Mbs;
1641   b->Nbs = Mbs;
1642 
1643   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1644   b->rowners[0]    = 0;
1645   for (i=2; i<=b->size; i++) {
1646     b->rowners[i] += b->rowners[i-1];
1647   }
1648   b->rstart    = b->rowners[b->rank];
1649   b->rend      = b->rowners[b->rank+1];
1650   b->cstart    = b->rstart;
1651   b->cend      = b->rend;
1652   for (i=0; i<=b->size; i++) {
1653     b->rowners_bs[i] = b->rowners[i]*bs;
1654   }
1655   b->rstart_bs = b-> rstart*bs;
1656   b->rend_bs   = b->rend*bs;
1657 
1658   b->cstart_bs = b->cstart*bs;
1659   b->cend_bs   = b->cend*bs;
1660 
1661   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1662   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1663   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1664   PetscLogObjectParent(B,b->A);
1665 
1666   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1667   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1668   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1669   PetscLogObjectParent(B,b->B);
1670 
1671   /* build cache for off array entries formed */
1672   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1673 
1674   PetscFunctionReturn(0);
1675 }
1676 EXTERN_C_END
1677 
1678 /*MC
1679    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1680    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1681 
1682    Options Database Keys:
1683 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1684 
1685   Level: beginner
1686 
1687 .seealso: MatCreateMPISBAIJ
1688 M*/
1689 
1690 EXTERN_C_BEGIN
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatCreate_MPISBAIJ"
1693 int MatCreate_MPISBAIJ(Mat B)
1694 {
1695   Mat_MPISBAIJ *b;
1696   int          ierr;
1697   PetscTruth   flg;
1698 
1699   PetscFunctionBegin;
1700 
1701   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1702   B->data = (void*)b;
1703   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1704   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1705 
1706   B->ops->destroy    = MatDestroy_MPISBAIJ;
1707   B->ops->view       = MatView_MPISBAIJ;
1708   B->mapping    = 0;
1709   B->factor     = 0;
1710   B->assembled  = PETSC_FALSE;
1711 
1712   B->insertmode = NOT_SET_VALUES;
1713   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1714   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1715 
1716   /* build local table of row and column ownerships */
1717   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1718   b->cowners    = b->rowners + b->size + 2;
1719   b->rowners_bs = b->cowners + b->size + 2;
1720   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1721 
1722   /* build cache for off array entries formed */
1723   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1724   b->donotstash  = PETSC_FALSE;
1725   b->colmap      = PETSC_NULL;
1726   b->garray      = PETSC_NULL;
1727   b->roworiented = PETSC_TRUE;
1728 
1729 #if defined(PETSC_USE_MAT_SINGLE)
1730   /* stuff for MatSetValues_XXX in single precision */
1731   b->setvalueslen     = 0;
1732   b->setvaluescopy    = PETSC_NULL;
1733 #endif
1734 
1735   /* stuff used in block assembly */
1736   b->barray       = 0;
1737 
1738   /* stuff used for matrix vector multiply */
1739   b->lvec         = 0;
1740   b->Mvctx        = 0;
1741   b->slvec0       = 0;
1742   b->slvec0b      = 0;
1743   b->slvec1       = 0;
1744   b->slvec1a      = 0;
1745   b->slvec1b      = 0;
1746   b->sMvctx       = 0;
1747 
1748   /* stuff for MatGetRow() */
1749   b->rowindices   = 0;
1750   b->rowvalues    = 0;
1751   b->getrowactive = PETSC_FALSE;
1752 
1753   /* hash table stuff */
1754   b->ht           = 0;
1755   b->hd           = 0;
1756   b->ht_size      = 0;
1757   b->ht_flag      = PETSC_FALSE;
1758   b->ht_fact      = 0;
1759   b->ht_total_ct  = 0;
1760   b->ht_insert_ct = 0;
1761 
1762   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1763   if (flg) {
1764     PetscReal fact = 1.39;
1765     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1766     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1767     if (fact <= 1.0) fact = 1.39;
1768     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1769     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1770   }
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1772                                      "MatStoreValues_MPISBAIJ",
1773                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1775                                      "MatRetrieveValues_MPISBAIJ",
1776                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1778                                      "MatGetDiagonalBlock_MPISBAIJ",
1779                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1781                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1782                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1783   B->symmetric                  = PETSC_TRUE;
1784   B->structurally_symmetric     = PETSC_TRUE;
1785   B->symmetric_set              = PETSC_TRUE;
1786   B->structurally_symmetric_set = PETSC_TRUE;
1787   PetscFunctionReturn(0);
1788 }
1789 EXTERN_C_END
1790 
1791 /*MC
1792    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1793 
1794    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1795    and MATMPISBAIJ otherwise.
1796 
1797    Options Database Keys:
1798 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1799 
1800   Level: beginner
1801 
1802 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1803 M*/
1804 
1805 EXTERN_C_BEGIN
1806 #undef __FUNCT__
1807 #define __FUNCT__ "MatCreate_SBAIJ"
1808 int MatCreate_SBAIJ(Mat A) {
1809   int ierr,size;
1810 
1811   PetscFunctionBegin;
1812   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1813   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1814   if (size == 1) {
1815     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1816   } else {
1817     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 EXTERN_C_END
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1825 /*@C
1826    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1827    the user should preallocate the matrix storage by setting the parameters
1828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1829    performance can be increased by more than a factor of 50.
1830 
1831    Collective on Mat
1832 
1833    Input Parameters:
1834 +  A - the matrix
1835 .  bs   - size of blockk
1836 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1837            submatrix  (same for all local rows)
1838 .  d_nnz - array containing the number of block nonzeros in the various block rows
1839            in the upper triangular and diagonal part of the in diagonal portion of the local
1840            (possibly different for each block row) or PETSC_NULL.  You must leave room
1841            for the diagonal entry even if it is zero.
1842 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1843            submatrix (same for all local rows).
1844 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1845            off-diagonal portion of the local submatrix (possibly different for
1846            each block row) or PETSC_NULL.
1847 
1848 
1849    Options Database Keys:
1850 .   -mat_no_unroll - uses code that does not unroll the loops in the
1851                      block calculations (much slower)
1852 .   -mat_block_size - size of the blocks to use
1853 
1854    Notes:
1855 
1856    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1857    than it must be used on all processors that share the object for that argument.
1858 
1859    Storage Information:
1860    For a square global matrix we define each processor's diagonal portion
1861    to be its local rows and the corresponding columns (a square submatrix);
1862    each processor's off-diagonal portion encompasses the remainder of the
1863    local matrix (a rectangular submatrix).
1864 
1865    The user can specify preallocated storage for the diagonal part of
1866    the local submatrix with either d_nz or d_nnz (not both).  Set
1867    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1868    memory allocation.  Likewise, specify preallocated storage for the
1869    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1870 
1871    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1872    the figure below we depict these three local rows and all columns (0-11).
1873 
1874 .vb
1875            0 1 2 3 4 5 6 7 8 9 10 11
1876           -------------------
1877    row 3  |  o o o d d d o o o o o o
1878    row 4  |  o o o d d d o o o o o o
1879    row 5  |  o o o d d d o o o o o o
1880           -------------------
1881 .ve
1882 
1883    Thus, any entries in the d locations are stored in the d (diagonal)
1884    submatrix, and any entries in the o locations are stored in the
1885    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1886    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1887 
1888    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1889    plus the diagonal part of the d matrix,
1890    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1891    In general, for PDE problems in which most nonzeros are near the diagonal,
1892    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1893    or you will get TERRIBLE performance; see the users' manual chapter on
1894    matrices.
1895 
1896    Level: intermediate
1897 
1898 .keywords: matrix, block, aij, compressed row, sparse, parallel
1899 
1900 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1901 @*/
1902 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1903 {
1904   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1905 
1906   PetscFunctionBegin;
1907   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1908   if (f) {
1909     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatCreateMPISBAIJ"
1916 /*@C
1917    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1918    (block compressed row).  For good matrix assembly performance
1919    the user should preallocate the matrix storage by setting the parameters
1920    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1921    performance can be increased by more than a factor of 50.
1922 
1923    Collective on MPI_Comm
1924 
1925    Input Parameters:
1926 +  comm - MPI communicator
1927 .  bs   - size of blockk
1928 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1929            This value should be the same as the local size used in creating the
1930            y vector for the matrix-vector product y = Ax.
1931 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1932            This value should be the same as the local size used in creating the
1933            x vector for the matrix-vector product y = Ax.
1934 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1935 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1936 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1937            submatrix  (same for all local rows)
1938 .  d_nnz - array containing the number of block nonzeros in the various block rows
1939            in the upper triangular portion of the in diagonal portion of the local
1940            (possibly different for each block block row) or PETSC_NULL.
1941            You must leave room for the diagonal entry even if it is zero.
1942 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1943            submatrix (same for all local rows).
1944 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1945            off-diagonal portion of the local submatrix (possibly different for
1946            each block row) or PETSC_NULL.
1947 
1948    Output Parameter:
1949 .  A - the matrix
1950 
1951    Options Database Keys:
1952 .   -mat_no_unroll - uses code that does not unroll the loops in the
1953                      block calculations (much slower)
1954 .   -mat_block_size - size of the blocks to use
1955 .   -mat_mpi - use the parallel matrix data structures even on one processor
1956                (defaults to using SeqBAIJ format on one processor)
1957 
1958    Notes:
1959    The user MUST specify either the local or global matrix dimensions
1960    (possibly both).
1961 
1962    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1963    than it must be used on all processors that share the object for that argument.
1964 
1965    Storage Information:
1966    For a square global matrix we define each processor's diagonal portion
1967    to be its local rows and the corresponding columns (a square submatrix);
1968    each processor's off-diagonal portion encompasses the remainder of the
1969    local matrix (a rectangular submatrix).
1970 
1971    The user can specify preallocated storage for the diagonal part of
1972    the local submatrix with either d_nz or d_nnz (not both).  Set
1973    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1974    memory allocation.  Likewise, specify preallocated storage for the
1975    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1976 
1977    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1978    the figure below we depict these three local rows and all columns (0-11).
1979 
1980 .vb
1981            0 1 2 3 4 5 6 7 8 9 10 11
1982           -------------------
1983    row 3  |  o o o d d d o o o o o o
1984    row 4  |  o o o d d d o o o o o o
1985    row 5  |  o o o d d d o o o o o o
1986           -------------------
1987 .ve
1988 
1989    Thus, any entries in the d locations are stored in the d (diagonal)
1990    submatrix, and any entries in the o locations are stored in the
1991    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1992    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1993 
1994    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1995    plus the diagonal part of the d matrix,
1996    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1997    In general, for PDE problems in which most nonzeros are near the diagonal,
1998    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1999    or you will get TERRIBLE performance; see the users' manual chapter on
2000    matrices.
2001 
2002    Level: intermediate
2003 
2004 .keywords: matrix, block, aij, compressed row, sparse, parallel
2005 
2006 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2007 @*/
2008 
2009 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2010 {
2011   int ierr,size;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2015   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2016   if (size > 1) {
2017     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2018     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2019   } else {
2020     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2021     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2029 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2030 {
2031   Mat          mat;
2032   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2033   int          ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2034   PetscScalar  *array;
2035 
2036   PetscFunctionBegin;
2037   *newmat       = 0;
2038   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2039   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2040   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2041 
2042   mat->factor       = matin->factor;
2043   mat->preallocated = PETSC_TRUE;
2044   mat->assembled    = PETSC_TRUE;
2045   mat->insertmode   = NOT_SET_VALUES;
2046 
2047   a = (Mat_MPISBAIJ*)mat->data;
2048   a->bs  = oldmat->bs;
2049   a->bs2 = oldmat->bs2;
2050   a->mbs = oldmat->mbs;
2051   a->nbs = oldmat->nbs;
2052   a->Mbs = oldmat->Mbs;
2053   a->Nbs = oldmat->Nbs;
2054 
2055   a->rstart       = oldmat->rstart;
2056   a->rend         = oldmat->rend;
2057   a->cstart       = oldmat->cstart;
2058   a->cend         = oldmat->cend;
2059   a->size         = oldmat->size;
2060   a->rank         = oldmat->rank;
2061   a->donotstash   = oldmat->donotstash;
2062   a->roworiented  = oldmat->roworiented;
2063   a->rowindices   = 0;
2064   a->rowvalues    = 0;
2065   a->getrowactive = PETSC_FALSE;
2066   a->barray       = 0;
2067   a->rstart_bs    = oldmat->rstart_bs;
2068   a->rend_bs      = oldmat->rend_bs;
2069   a->cstart_bs    = oldmat->cstart_bs;
2070   a->cend_bs      = oldmat->cend_bs;
2071 
2072   /* hash table stuff */
2073   a->ht           = 0;
2074   a->hd           = 0;
2075   a->ht_size      = 0;
2076   a->ht_flag      = oldmat->ht_flag;
2077   a->ht_fact      = oldmat->ht_fact;
2078   a->ht_total_ct  = 0;
2079   a->ht_insert_ct = 0;
2080 
2081   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2082   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2083   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2084   if (oldmat->colmap) {
2085 #if defined (PETSC_USE_CTABLE)
2086     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2087 #else
2088     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2089     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2090     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2091 #endif
2092   } else a->colmap = 0;
2093 
2094   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2095     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2096     PetscLogObjectMemory(mat,len*sizeof(int));
2097     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2098   } else a->garray = 0;
2099 
2100   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2101   PetscLogObjectParent(mat,a->lvec);
2102   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2103   PetscLogObjectParent(mat,a->Mvctx);
2104 
2105   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2106   PetscLogObjectParent(mat,a->slvec0);
2107   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2108   PetscLogObjectParent(mat,a->slvec1);
2109 
2110   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2111   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2113   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2114   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2115   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2116   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2117   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2118   PetscLogObjectParent(mat,a->slvec0);
2119   PetscLogObjectParent(mat,a->slvec1);
2120   PetscLogObjectParent(mat,a->slvec0b);
2121   PetscLogObjectParent(mat,a->slvec1a);
2122   PetscLogObjectParent(mat,a->slvec1b);
2123 
2124   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2125   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2126   a->sMvctx = oldmat->sMvctx;
2127   PetscLogObjectParent(mat,a->sMvctx);
2128 
2129   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2130   PetscLogObjectParent(mat,a->A);
2131   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2132   PetscLogObjectParent(mat,a->B);
2133   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2134   *newmat = mat;
2135   PetscFunctionReturn(0);
2136 }
2137 
2138 #include "petscsys.h"
2139 
2140 #undef __FUNCT__
2141 #define __FUNCT__ "MatLoad_MPISBAIJ"
2142 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2143 {
2144   Mat          A;
2145   int          i,nz,ierr,j,rstart,rend,fd;
2146   PetscScalar  *vals,*buf;
2147   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2148   MPI_Status   status;
2149   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2150   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2151   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2152   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2153   int          dcount,kmax,k,nzcount,tmp;
2154 
2155   PetscFunctionBegin;
2156   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2157 
2158   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2159   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2160   if (!rank) {
2161     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2162     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2163     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2164     if (header[3] < 0) {
2165       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2166     }
2167   }
2168 
2169   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2170   M = header[1]; N = header[2];
2171 
2172   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2173 
2174   /*
2175      This code adds extra rows to make sure the number of rows is
2176      divisible by the blocksize
2177   */
2178   Mbs        = M/bs;
2179   extra_rows = bs - M + bs*(Mbs);
2180   if (extra_rows == bs) extra_rows = 0;
2181   else                  Mbs++;
2182   if (extra_rows &&!rank) {
2183     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2184   }
2185 
2186   /* determine ownership of all rows */
2187   mbs        = Mbs/size + ((Mbs % size) > rank);
2188   m          = mbs*bs;
2189   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2190   browners   = rowners + size + 1;
2191   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2192   rowners[0] = 0;
2193   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2194   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2195   rstart = rowners[rank];
2196   rend   = rowners[rank+1];
2197 
2198   /* distribute row lengths to all processors */
2199   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2200   if (!rank) {
2201     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2202     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2203     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2204     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2205     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2206     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2207     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2208   } else {
2209     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2210   }
2211 
2212   if (!rank) {   /* procs[0] */
2213     /* calculate the number of nonzeros on each processor */
2214     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2215     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2216     for (i=0; i<size; i++) {
2217       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2218         procsnz[i] += rowlengths[j];
2219       }
2220     }
2221     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2222 
2223     /* determine max buffer needed and allocate it */
2224     maxnz = 0;
2225     for (i=0; i<size; i++) {
2226       maxnz = PetscMax(maxnz,procsnz[i]);
2227     }
2228     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2229 
2230     /* read in my part of the matrix column indices  */
2231     nz     = procsnz[0];
2232     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2233     mycols = ibuf;
2234     if (size == 1)  nz -= extra_rows;
2235     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2236     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2237 
2238     /* read in every ones (except the last) and ship off */
2239     for (i=1; i<size-1; i++) {
2240       nz   = procsnz[i];
2241       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2242       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2243     }
2244     /* read in the stuff for the last proc */
2245     if (size != 1) {
2246       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2247       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2248       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2249       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2250     }
2251     ierr = PetscFree(cols);CHKERRQ(ierr);
2252   } else {  /* procs[i], i>0 */
2253     /* determine buffer space needed for message */
2254     nz = 0;
2255     for (i=0; i<m; i++) {
2256       nz += locrowlens[i];
2257     }
2258     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2259     mycols = ibuf;
2260     /* receive message of column indices*/
2261     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2262     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2263     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2264   }
2265 
2266   /* loop over local rows, determining number of off diagonal entries */
2267   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2268   odlens   = dlens + (rend-rstart);
2269   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2270   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2271   masked1  = mask    + Mbs;
2272   masked2  = masked1 + Mbs;
2273   rowcount = 0; nzcount = 0;
2274   for (i=0; i<mbs; i++) {
2275     dcount  = 0;
2276     odcount = 0;
2277     for (j=0; j<bs; j++) {
2278       kmax = locrowlens[rowcount];
2279       for (k=0; k<kmax; k++) {
2280         tmp = mycols[nzcount++]/bs; /* block col. index */
2281         if (!mask[tmp]) {
2282           mask[tmp] = 1;
2283           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2284           else masked1[dcount++] = tmp; /* entry in diag portion */
2285         }
2286       }
2287       rowcount++;
2288     }
2289 
2290     dlens[i]  = dcount;  /* d_nzz[i] */
2291     odlens[i] = odcount; /* o_nzz[i] */
2292 
2293     /* zero out the mask elements we set */
2294     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2295     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2296   }
2297 
2298   /* create our matrix */
2299   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2300   ierr = MatSetType(A,type);CHKERRQ(ierr);
2301   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2302   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2303 
2304   if (!rank) {
2305     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2306     /* read in my part of the matrix numerical values  */
2307     nz = procsnz[0];
2308     vals = buf;
2309     mycols = ibuf;
2310     if (size == 1)  nz -= extra_rows;
2311     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2312     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2313 
2314     /* insert into matrix */
2315     jj      = rstart*bs;
2316     for (i=0; i<m; i++) {
2317       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2318       mycols += locrowlens[i];
2319       vals   += locrowlens[i];
2320       jj++;
2321     }
2322 
2323     /* read in other processors (except the last one) and ship out */
2324     for (i=1; i<size-1; i++) {
2325       nz   = procsnz[i];
2326       vals = buf;
2327       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2328       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2329     }
2330     /* the last proc */
2331     if (size != 1){
2332       nz   = procsnz[i] - extra_rows;
2333       vals = buf;
2334       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2335       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2336       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2337     }
2338     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2339 
2340   } else {
2341     /* receive numeric values */
2342     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2343 
2344     /* receive message of values*/
2345     vals   = buf;
2346     mycols = ibuf;
2347     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2348     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2349     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2350 
2351     /* insert into matrix */
2352     jj      = rstart*bs;
2353     for (i=0; i<m; i++) {
2354       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2355       mycols += locrowlens[i];
2356       vals   += locrowlens[i];
2357       jj++;
2358     }
2359   }
2360 
2361   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2362   ierr = PetscFree(buf);CHKERRQ(ierr);
2363   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2364   ierr = PetscFree(rowners);CHKERRQ(ierr);
2365   ierr = PetscFree(dlens);CHKERRQ(ierr);
2366   ierr = PetscFree(mask);CHKERRQ(ierr);
2367   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2368   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2369   *newmat = A;
2370   PetscFunctionReturn(0);
2371 }
2372 
2373 #undef __FUNCT__
2374 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2375 /*@
2376    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2377 
2378    Input Parameters:
2379 .  mat  - the matrix
2380 .  fact - factor
2381 
2382    Collective on Mat
2383 
2384    Level: advanced
2385 
2386   Notes:
2387    This can also be set by the command line option: -mat_use_hash_table fact
2388 
2389 .keywords: matrix, hashtable, factor, HT
2390 
2391 .seealso: MatSetOption()
2392 @*/
2393 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2394 {
2395   PetscFunctionBegin;
2396   SETERRQ(1,"Function not yet written for SBAIJ format");
2397   /* PetscFunctionReturn(0); */
2398 }
2399 
2400 #undef __FUNCT__
2401 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2402 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2403 {
2404   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2405   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2406   PetscReal    atmp;
2407   PetscReal    *work,*svalues,*rvalues;
2408   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2409   int          rank,size,*rowners_bs,dest,count,source;
2410   PetscScalar  *va;
2411   MatScalar    *ba;
2412   MPI_Status   stat;
2413 
2414   PetscFunctionBegin;
2415   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2416   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2417 
2418   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2419   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2420 
2421   bs   = a->bs;
2422   mbs  = a->mbs;
2423   Mbs  = a->Mbs;
2424   ba   = b->a;
2425   bi   = b->i;
2426   bj   = b->j;
2427   /*
2428   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2429   PetscSynchronizedFlush(A->comm);
2430   */
2431 
2432   /* find ownerships */
2433   rowners_bs = a->rowners_bs;
2434   /*
2435   if (!rank){
2436     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2437   }
2438   */
2439 
2440   /* each proc creates an array to be distributed */
2441   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2442   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2443 
2444   /* row_max for B */
2445   if (rank != size-1){
2446     for (i=0; i<mbs; i++) {
2447       ncols = bi[1] - bi[0]; bi++;
2448       brow  = bs*i;
2449       for (j=0; j<ncols; j++){
2450         bcol = bs*(*bj);
2451         for (kcol=0; kcol<bs; kcol++){
2452           col = bcol + kcol;                 /* local col index */
2453           col += rowners_bs[rank+1];      /* global col index */
2454           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2455           for (krow=0; krow<bs; krow++){
2456             atmp = PetscAbsScalar(*ba); ba++;
2457             row = brow + krow;    /* local row index */
2458             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2459             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2460             if (work[col] < atmp) work[col] = atmp;
2461           }
2462         }
2463         bj++;
2464       }
2465     }
2466     /*
2467       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2468       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2469       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2470       */
2471 
2472     /* send values to its owners */
2473     for (dest=rank+1; dest<size; dest++){
2474       svalues = work + rowners_bs[dest];
2475       count   = rowners_bs[dest+1]-rowners_bs[dest];
2476       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2477       /*
2478       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2479       PetscSynchronizedFlush(A->comm);
2480       */
2481     }
2482   }
2483 
2484   /* receive values */
2485   if (rank){
2486     rvalues = work;
2487     count   = rowners_bs[rank+1]-rowners_bs[rank];
2488     for (source=0; source<rank; source++){
2489       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2490       /* process values */
2491       for (i=0; i<count; i++){
2492         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2493       }
2494       /*
2495       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2496       PetscSynchronizedFlush(A->comm);
2497       */
2498     }
2499   }
2500 
2501   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2502   ierr = PetscFree(work);CHKERRQ(ierr);
2503   PetscFunctionReturn(0);
2504 }
2505 
2506 #undef __FUNCT__
2507 #define __FUNCT__ "MatRelax_MPISBAIJ"
2508 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2509 {
2510   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2511   int            ierr,mbs=mat->mbs,bs=mat->bs;
2512   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2513   Vec            bb1;
2514 
2515   PetscFunctionBegin;
2516   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2517   if (bs > 1)
2518     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2519 
2520   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2521     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2522       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2523       its--;
2524     }
2525 
2526     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2527     while (its--){
2528 
2529       /* lower triangular part: slvec0b = - B^T*xx */
2530       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2531 
2532       /* copy xx into slvec0a */
2533       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2534       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2535       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2536       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2537 
2538       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2539 
2540       /* copy bb into slvec1a */
2541       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2542       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2543       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2544       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2545 
2546       /* set slvec1b = 0 */
2547       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2548 
2549       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2550       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2551       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2552       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2553 
2554       /* upper triangular part: bb1 = bb1 - B*x */
2555       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2556 
2557       /* local diagonal sweep */
2558       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2559     }
2560     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2561   } else {
2562     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2563   }
2564   PetscFunctionReturn(0);
2565 }
2566 
2567 #undef __FUNCT__
2568 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2569 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2570 {
2571   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2572   int            ierr;
2573   PetscScalar    mone=-1.0;
2574   Vec            lvec1,bb1;
2575 
2576   PetscFunctionBegin;
2577   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2578   if (mat->bs > 1)
2579     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2580 
2581   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2582     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2583       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2584       its--;
2585     }
2586 
2587     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2588     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2589     while (its--){
2590       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2591 
2592       /* lower diagonal part: bb1 = bb - B^T*xx */
2593       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2594       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2595 
2596       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2597       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2598       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2599 
2600       /* upper diagonal part: bb1 = bb1 - B*x */
2601       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2602       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2603 
2604       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2605 
2606       /* diagonal sweep */
2607       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2608     }
2609     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2610     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2611   } else {
2612     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2613   }
2614   PetscFunctionReturn(0);
2615 }
2616 
2617