xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2f2cbf6d06d0509fc30bbb0fccbdb1461a4f72a9)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 extern int MatSetUpMultiply_MPISBAIJ(Mat);
8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 int MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   int          ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 int MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   int          ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   int          ierr,i,N = m*n;
238   MatScalar    *vsingle;
239 
240   PetscFunctionBegin;
241   if (N > b->setvalueslen) {
242     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
243     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
244     b->setvalueslen  = N;
245   }
246   vsingle = b->setvaluescopy;
247 
248   for (i=0; i<N; i++) {
249     vsingle[i] = v[i];
250   }
251   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
258 {
259   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
260   int         ierr,i,N = m*n*b->bs2;
261   MatScalar   *vsingle;
262 
263   PetscFunctionBegin;
264   if (N > b->setvalueslen) {
265     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
266     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
267     b->setvalueslen  = N;
268   }
269   vsingle = b->setvaluescopy;
270   for (i=0; i<N; i++) {
271     vsingle[i] = v[i];
272   }
273   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
279 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
282   int         ierr,i,N = m*n;
283   MatScalar   *vsingle;
284 
285   PetscFunctionBegin;
286   SETERRQ(1,"Function not yet written for SBAIJ format");
287   /* PetscFunctionReturn(0); */
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
292 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
293 {
294   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
295   int         ierr,i,N = m*n*b->bs2;
296   MatScalar   *vsingle;
297 
298   PetscFunctionBegin;
299   SETERRQ(1,"Function not yet written for SBAIJ format");
300   /* PetscFunctionReturn(0); */
301 }
302 #endif
303 
304 /* Only add/insert a(i,j) with i<=j (blocks).
305    Any a(i,j) with i>j input by user is ingored.
306 */
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
309 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
310 {
311   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
312   MatScalar    value;
313   PetscTruth   roworiented = baij->roworiented;
314   int          ierr,i,j,row,col;
315   int          rstart_orig=baij->rstart_bs;
316   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
317   int          cend_orig=baij->cend_bs,bs=baij->bs;
318 
319   /* Some Variables required in the macro */
320   Mat          A = baij->A;
321   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
322   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar    *aa=a->a;
324 
325   Mat          B = baij->B;
326   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
327   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar    *ba=b->a;
329 
330   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   int          low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar    *ap,*bap;
333 
334   /* for stash */
335   int          n_loc, *in_loc=0;
336   MatScalar    *v_loc=0;
337 
338   PetscFunctionBegin;
339 
340   if(!baij->donotstash){
341     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
342     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
343   }
344 
345   for (i=0; i<m; i++) {
346     if (im[i] < 0) continue;
347 #if defined(PETSC_USE_BOPT_g)
348     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
349 #endif
350     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
351       row = im[i] - rstart_orig;              /* local row index */
352       for (j=0; j<n; j++) {
353         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
354         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
355           col = in[j] - cstart_orig;          /* local col index */
356           brow = row/bs; bcol = col/bs;
357           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
358           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
359           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
360           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
361         } else if (in[j] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
364 #endif
365         else {  /* off-diag entry (B) */
366           if (mat->was_assembled) {
367             if (!baij->colmap) {
368               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
369             }
370 #if defined (PETSC_USE_CTABLE)
371             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
372             col  = col - 1;
373 #else
374             col = baij->colmap[in[j]/bs] - 1;
375 #endif
376             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
377               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
378               col =  in[j];
379               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
380               B = baij->B;
381               b = (Mat_SeqBAIJ*)(B)->data;
382               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
383               ba=b->a;
384             } else col += in[j]%bs;
385           } else col = in[j];
386           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
387           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
388           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
389         }
390       }
391     } else {  /* off processor entry */
392       if (!baij->donotstash) {
393         n_loc = 0;
394         for (j=0; j<n; j++){
395           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
396           in_loc[n_loc] = in[j];
397           if (roworiented) {
398             v_loc[n_loc] = v[i*n+j];
399           } else {
400             v_loc[n_loc] = v[j*m+i];
401           }
402           n_loc++;
403         }
404         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
405       }
406     }
407   }
408 
409   if(!baij->donotstash){
410     ierr = PetscFree(in_loc);CHKERRQ(ierr);
411     ierr = PetscFree(v_loc);CHKERRQ(ierr);
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
418 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
419 {
420   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
421   const MatScalar *value;
422   MatScalar       *barray=baij->barray;
423   PetscTruth      roworiented = baij->roworiented;
424   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
425   int             rend=baij->rend,cstart=baij->cstart,stepval;
426   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
427 
428   PetscFunctionBegin;
429   if(!barray) {
430     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
431     baij->barray = barray;
432   }
433 
434   if (roworiented) {
435     stepval = (n-1)*bs;
436   } else {
437     stepval = (m-1)*bs;
438   }
439   for (i=0; i<m; i++) {
440     if (im[i] < 0) continue;
441 #if defined(PETSC_USE_BOPT_g)
442     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
443 #endif
444     if (im[i] >= rstart && im[i] < rend) {
445       row = im[i] - rstart;
446       for (j=0; j<n; j++) {
447         /* If NumCol = 1 then a copy is not required */
448         if ((roworiented) && (n == 1)) {
449           barray = (MatScalar*) v + i*bs2;
450         } else if((!roworiented) && (m == 1)) {
451           barray = (MatScalar*) v + j*bs2;
452         } else { /* Here a copy is required */
453           if (roworiented) {
454             value = v + i*(stepval+bs)*bs + j*bs;
455           } else {
456             value = v + j*(stepval+bs)*bs + i*bs;
457           }
458           for (ii=0; ii<bs; ii++,value+=stepval) {
459             for (jj=0; jj<bs; jj++) {
460               *barray++  = *value++;
461             }
462           }
463           barray -=bs2;
464         }
465 
466         if (in[j] >= cstart && in[j] < cend){
467           col  = in[j] - cstart;
468           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
469         }
470         else if (in[j] < 0) continue;
471 #if defined(PETSC_USE_BOPT_g)
472         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
473 #endif
474         else {
475           if (mat->was_assembled) {
476             if (!baij->colmap) {
477               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
478             }
479 
480 #if defined(PETSC_USE_BOPT_g)
481 #if defined (PETSC_USE_CTABLE)
482             { int data;
483               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
484               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
485             }
486 #else
487             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
488 #endif
489 #endif
490 #if defined (PETSC_USE_CTABLE)
491 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
492             col  = (col - 1)/bs;
493 #else
494             col = (baij->colmap[in[j]] - 1)/bs;
495 #endif
496             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
497               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
498               col =  in[j];
499             }
500           }
501           else col = in[j];
502           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
503         }
504       }
505     } else {
506       if (!baij->donotstash) {
507         if (roworiented) {
508           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
509         } else {
510           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
511         }
512       }
513     }
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 #define HASH_KEY 0.6180339887
519 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
520 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
521 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 #undef __FUNCT__
523 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
524 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
525 {
526   PetscFunctionBegin;
527   SETERRQ(1,"Function not yet written for SBAIJ format");
528   /* PetscFunctionReturn(0); */
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
533 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
534 {
535   PetscFunctionBegin;
536   SETERRQ(1,"Function not yet written for SBAIJ format");
537   /* PetscFunctionReturn(0); */
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatGetValues_MPISBAIJ"
542 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
543 {
544   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
545   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
546   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
547 
548   PetscFunctionBegin;
549   for (i=0; i<m; i++) {
550     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
551     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
552     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
553       row = idxm[i] - bsrstart;
554       for (j=0; j<n; j++) {
555         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
556         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
557         if (idxn[j] >= bscstart && idxn[j] < bscend){
558           col = idxn[j] - bscstart;
559           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
560         } else {
561           if (!baij->colmap) {
562             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
563           }
564 #if defined (PETSC_USE_CTABLE)
565           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
566           data --;
567 #else
568           data = baij->colmap[idxn[j]/bs]-1;
569 #endif
570           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
571           else {
572             col  = data + idxn[j]%bs;
573             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
574           }
575         }
576       }
577     } else {
578       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
579     }
580   }
581  PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatNorm_MPISBAIJ"
586 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
587 {
588   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
589   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
590   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
591   int        ierr;
592   PetscReal  sum[2],*lnorm2;
593 
594   PetscFunctionBegin;
595   if (baij->size == 1) {
596     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
597   } else {
598     if (type == NORM_FROBENIUS) {
599       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
600       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
601       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
602       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
603       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
604       /*
605       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
606       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
607       */
608       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
609       /*
610       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
611       PetscSynchronizedFlush(mat->comm); */
612 
613       *norm = sqrt(sum[0] + 2*sum[1]);
614       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
617     }
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 /*
623   Creates the hash table, and sets the table
624   This table is created only once.
625   If new entried need to be added to the matrix
626   then the hash table has to be destroyed and
627   recreated.
628 */
629 #undef __FUNCT__
630 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
631 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
632 {
633   PetscFunctionBegin;
634   SETERRQ(1,"Function not yet written for SBAIJ format");
635   /* PetscFunctionReturn(0); */
636 }
637 
638 #undef __FUNCT__
639 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
640 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
641 {
642   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
643   int         ierr,nstash,reallocs;
644   InsertMode  addv;
645 
646   PetscFunctionBegin;
647   if (baij->donotstash) {
648     PetscFunctionReturn(0);
649   }
650 
651   /* make sure all processors are either in INSERTMODE or ADDMODE */
652   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
653   if (addv == (ADD_VALUES|INSERT_VALUES)) {
654     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
655   }
656   mat->insertmode = addv; /* in case this processor had no cache */
657 
658   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
659   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
660   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
661   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
669 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
672   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
673   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
674   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
675   int         *row,*col,other_disassembled;
676   PetscTruth  r1,r2,r3;
677   MatScalar   *val;
678   InsertMode  addv = mat->insertmode;
679 
680   PetscFunctionBegin;
681 
682   if (!baij->donotstash) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       /*
686       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
687       PetscSynchronizedFlush(mat->comm);
688       */
689       if (!flg) break;
690 
691       for (i=0; i<n;) {
692         /* Now identify the consecutive vals belonging to the same row */
693         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
694         if (j < n) ncols = j-i;
695         else       ncols = n-i;
696         /* Now assemble all these values with a single function call */
697         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702     /* Now process the block-stash. Since the values are stashed column-oriented,
703        set the roworiented flag to column oriented, and after MatSetValues()
704        restore the original flags */
705     r1 = baij->roworiented;
706     r2 = a->roworiented;
707     r3 = b->roworiented;
708     baij->roworiented = PETSC_FALSE;
709     a->roworiented    = PETSC_FALSE;
710     b->roworiented    = PETSC_FALSE;
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
718         if (j < n) ncols = j-i;
719         else       ncols = n-i;
720         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
721         i = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
725     baij->roworiented = r1;
726     a->roworiented    = r2;
727     b->roworiented    = r3;
728   }
729 
730   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
731   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
732 
733   /* determine if any processor has disassembled, if so we must
734      also disassemble ourselfs, in order that we may reassemble. */
735   /*
736      if nonzero structure of submatrix B cannot change then we know that
737      no processor disassembled thus we can skip this stuff
738   */
739   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
740     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
741     if (mat->was_assembled && !other_disassembled) {
742       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
743     }
744   }
745 
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
748   }
749   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
750   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
751 
752 #if defined(PETSC_USE_BOPT_g)
753   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
754     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
755     baij->ht_total_ct  = 0;
756     baij->ht_insert_ct = 0;
757   }
758 #endif
759   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
760     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
761     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
762     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
763   }
764 
765   if (baij->rowvalues) {
766     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
767     baij->rowvalues = 0;
768   }
769 
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
775 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
776 {
777   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
778   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
779   PetscTruth        isascii,isdraw;
780   PetscViewer       sviewer;
781   PetscViewerFormat format;
782 
783   PetscFunctionBegin;
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
786   if (isascii) {
787     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
788     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
789       MatInfo info;
790       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
791       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
793               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
794               baij->bs,(int)info.memory);CHKERRQ(ierr);
795       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
796       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
797       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
798       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
799       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
800       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
801       PetscFunctionReturn(0);
802     } else if (format == PETSC_VIEWER_ASCII_INFO) {
803       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw       draw;
810     PetscTruth isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
813   }
814 
815   if (size == 1) {
816     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
817     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
818   } else {
819     /* assemble the entire matrix onto first processor. */
820     Mat         A;
821     Mat_SeqSBAIJ *Aloc;
822     Mat_SeqBAIJ *Bloc;
823     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
824     MatScalar   *a;
825 
826     if (!rank) {
827       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
828     } else {
829       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
830     }
831     PetscLogObjectParent(mat,A);
832 
833     /* copy over the A part */
834     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
835     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
836     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
837 
838     for (i=0; i<mbs; i++) {
839       rvals[0] = bs*(baij->rstart + i);
840       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
841       for (j=ai[i]; j<ai[i+1]; j++) {
842         col = (baij->cstart+aj[j])*bs;
843         for (k=0; k<bs; k++) {
844           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
845           col++; a += bs;
846         }
847       }
848     }
849     /* copy over the B part */
850     Bloc = (Mat_SeqBAIJ*)baij->B->data;
851     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
852     for (i=0; i<mbs; i++) {
853       rvals[0] = bs*(baij->rstart + i);
854       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
855       for (j=ai[i]; j<ai[i+1]; j++) {
856         col = baij->garray[aj[j]]*bs;
857         for (k=0; k<bs; k++) {
858           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
859           col++; a += bs;
860         }
861       }
862     }
863     ierr = PetscFree(rvals);CHKERRQ(ierr);
864     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
865     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     /*
867        Everyone has to call to draw the matrix since the graphics waits are
868        synchronized across all processors that share the PetscDraw object
869     */
870     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
871     if (!rank) {
872       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
873       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
874     }
875     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
876     ierr = MatDestroy(A);CHKERRQ(ierr);
877   }
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "MatView_MPISBAIJ"
883 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
884 {
885   int        ierr;
886   PetscTruth isascii,isdraw,issocket,isbinary;
887 
888   PetscFunctionBegin;
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
893   if (isascii || isdraw || issocket || isbinary) {
894     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
895   } else {
896     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatDestroy_MPISBAIJ"
903 int MatDestroy_MPISBAIJ(Mat mat)
904 {
905   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
906   int         ierr;
907 
908   PetscFunctionBegin;
909 #if defined(PETSC_USE_LOG)
910   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
911 #endif
912   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
913   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
914   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
915   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
917 #if defined (PETSC_USE_CTABLE)
918   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
919 #else
920   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
921 #endif
922   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
923   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
924   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
925   if (baij->slvec0) {
926     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
927     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
928   }
929   if (baij->slvec1) {
930     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
931     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
933   }
934   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
935   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
936   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
937   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
938 #if defined(PETSC_USE_MAT_SINGLE)
939   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
940 #endif
941   ierr = PetscFree(baij);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_MPISBAIJ"
947 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
948 {
949   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
950   int         ierr,nt,mbs=a->mbs,bs=a->bs;
951   PetscScalar *x,*from,zero=0.0;
952 
953   PetscFunctionBegin;
954   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
955   if (nt != A->n) {
956     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
957   }
958   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
959   if (nt != A->m) {
960     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
961   }
962 
963   /* diagonal part */
964   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
965   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
966 
967   /* subdiagonal part */
968   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
969 
970   /* copy x into the vec slvec0 */
971   ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr);
972   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
973   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
974   ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr);
975 
976   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
977   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
978   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
979 
980   /* supperdiagonal part */
981   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
982 
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
988 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
989 {
990   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
991   int         ierr,nt;
992 
993   PetscFunctionBegin;
994   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
995   if (nt != A->n) {
996     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
997   }
998   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
999   if (nt != A->m) {
1000     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1001   }
1002 
1003   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1004   /* do diagonal part */
1005   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1006   /* do supperdiagonal part */
1007   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1008   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1009   /* do subdiagonal part */
1010   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1011   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1012   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1013 
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1019 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1020 {
1021   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1022   int          ierr,mbs=a->mbs,bs=a->bs;
1023   PetscScalar  *x,*from,zero=0.0;
1024 
1025   PetscFunctionBegin;
1026   /*
1027   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1028   PetscSynchronizedFlush(A->comm);
1029   */
1030   /* diagonal part */
1031   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1032   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1033 
1034   /* subdiagonal part */
1035   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1036 
1037   /* copy x into the vec slvec0 */
1038   ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr);
1039   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1040   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1041   ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr);
1042 
1043   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1044   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1045   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1046 
1047   /* supperdiagonal part */
1048   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1049 
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1055 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1058   int        ierr;
1059 
1060   PetscFunctionBegin;
1061   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1062   /* do diagonal part */
1063   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1064   /* do supperdiagonal part */
1065   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1066   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1067 
1068   /* do subdiagonal part */
1069   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1070   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1071   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1072 
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1078 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1079 {
1080   int ierr;
1081 
1082   PetscFunctionBegin;
1083   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1089 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1090 {
1091   int ierr;
1092 
1093   PetscFunctionBegin;
1094   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /*
1099   This only works correctly for square matrices where the subblock A->A is the
1100    diagonal block
1101 */
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1104 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1105 {
1106   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1107   int         ierr;
1108 
1109   PetscFunctionBegin;
1110   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1111   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatScale_MPISBAIJ"
1117 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1118 {
1119   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1120   int         ierr;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1124   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1130 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1131 {
1132   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1133   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1134   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1135   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1136   int            *cmap,*idx_p,cstart = mat->cstart;
1137 
1138   PetscFunctionBegin;
1139   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1140   mat->getrowactive = PETSC_TRUE;
1141 
1142   if (!mat->rowvalues && (idx || v)) {
1143     /*
1144         allocate enough space to hold information from the longest row.
1145     */
1146     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1147     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1148     int     max = 1,mbs = mat->mbs,tmp;
1149     for (i=0; i<mbs; i++) {
1150       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1151       if (max < tmp) { max = tmp; }
1152     }
1153     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1154     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1155   }
1156 
1157   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1158   lrow = row - brstart;  /* local row index */
1159 
1160   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1161   if (!v)   {pvA = 0; pvB = 0;}
1162   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1163   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1164   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1165   nztot = nzA + nzB;
1166 
1167   cmap  = mat->garray;
1168   if (v  || idx) {
1169     if (nztot) {
1170       /* Sort by increasing column numbers, assuming A and B already sorted */
1171       int imark = -1;
1172       if (v) {
1173         *v = v_p = mat->rowvalues;
1174         for (i=0; i<nzB; i++) {
1175           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1176           else break;
1177         }
1178         imark = i;
1179         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1180         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1181       }
1182       if (idx) {
1183         *idx = idx_p = mat->rowindices;
1184         if (imark > -1) {
1185           for (i=0; i<imark; i++) {
1186             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1187           }
1188         } else {
1189           for (i=0; i<nzB; i++) {
1190             if (cmap[cworkB[i]/bs] < cstart)
1191               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1192             else break;
1193           }
1194           imark = i;
1195         }
1196         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1197         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1198       }
1199     } else {
1200       if (idx) *idx = 0;
1201       if (v)   *v   = 0;
1202     }
1203   }
1204   *nz = nztot;
1205   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1206   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1212 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1213 {
1214   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1215 
1216   PetscFunctionBegin;
1217   if (baij->getrowactive == PETSC_FALSE) {
1218     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1219   }
1220   baij->getrowactive = PETSC_FALSE;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1226 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1227 {
1228   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1229 
1230   PetscFunctionBegin;
1231   *bs = baij->bs;
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1237 int MatZeroEntries_MPISBAIJ(Mat A)
1238 {
1239   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1240   int         ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1244   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1250 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1251 {
1252   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1253   Mat         A = a->A,B = a->B;
1254   int         ierr;
1255   PetscReal   isend[5],irecv[5];
1256 
1257   PetscFunctionBegin;
1258   info->block_size     = (PetscReal)a->bs;
1259   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1260   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1261   isend[3] = info->memory;  isend[4] = info->mallocs;
1262   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1263   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1264   isend[3] += info->memory;  isend[4] += info->mallocs;
1265   if (flag == MAT_LOCAL) {
1266     info->nz_used      = isend[0];
1267     info->nz_allocated = isend[1];
1268     info->nz_unneeded  = isend[2];
1269     info->memory       = isend[3];
1270     info->mallocs      = isend[4];
1271   } else if (flag == MAT_GLOBAL_MAX) {
1272     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1273     info->nz_used      = irecv[0];
1274     info->nz_allocated = irecv[1];
1275     info->nz_unneeded  = irecv[2];
1276     info->memory       = irecv[3];
1277     info->mallocs      = irecv[4];
1278   } else if (flag == MAT_GLOBAL_SUM) {
1279     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1280     info->nz_used      = irecv[0];
1281     info->nz_allocated = irecv[1];
1282     info->nz_unneeded  = irecv[2];
1283     info->memory       = irecv[3];
1284     info->mallocs      = irecv[4];
1285   } else {
1286     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1287   }
1288   info->rows_global       = (PetscReal)A->M;
1289   info->columns_global    = (PetscReal)A->N;
1290   info->rows_local        = (PetscReal)A->m;
1291   info->columns_local     = (PetscReal)A->N;
1292   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1293   info->fill_ratio_needed = 0;
1294   info->factor_mallocs    = 0;
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1300 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1301 {
1302   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1303   int         ierr;
1304 
1305   PetscFunctionBegin;
1306   switch (op) {
1307   case MAT_NO_NEW_NONZERO_LOCATIONS:
1308   case MAT_YES_NEW_NONZERO_LOCATIONS:
1309   case MAT_COLUMNS_UNSORTED:
1310   case MAT_COLUMNS_SORTED:
1311   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1312   case MAT_KEEP_ZEROED_ROWS:
1313   case MAT_NEW_NONZERO_LOCATION_ERR:
1314     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1315     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1316     break;
1317   case MAT_ROW_ORIENTED:
1318     a->roworiented = PETSC_TRUE;
1319     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1320     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1321     break;
1322   case MAT_ROWS_SORTED:
1323   case MAT_ROWS_UNSORTED:
1324   case MAT_YES_NEW_DIAGONALS:
1325     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1326     break;
1327   case MAT_COLUMN_ORIENTED:
1328     a->roworiented = PETSC_FALSE;
1329     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1330     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1331     break;
1332   case MAT_IGNORE_OFF_PROC_ENTRIES:
1333     a->donotstash = PETSC_TRUE;
1334     break;
1335   case MAT_NO_NEW_DIAGONALS:
1336     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1337   case MAT_USE_HASH_TABLE:
1338     a->ht_flag = PETSC_TRUE;
1339     break;
1340   case MAT_NOT_SYMMETRIC:
1341   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1342   case MAT_HERMITIAN:
1343     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1344   case MAT_SYMMETRIC:
1345   case MAT_STRUCTURALLY_SYMMETRIC:
1346   case MAT_NOT_HERMITIAN:
1347   case MAT_SYMMETRY_ETERNAL:
1348   case MAT_NOT_SYMMETRY_ETERNAL:
1349     break;
1350   default:
1351     SETERRQ(PETSC_ERR_SUP,"unknown option");
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1358 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1359 {
1360   int ierr;
1361   PetscFunctionBegin;
1362   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1368 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1369 {
1370   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1371   Mat         a = baij->A,b = baij->B;
1372   int         ierr,s1,s2,s3;
1373 
1374   PetscFunctionBegin;
1375   if (ll != rr) {
1376     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1377   }
1378   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1379   if (rr) {
1380     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1381     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1382     /* Overlap communication with computation. */
1383     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1384     /*} if (ll) { */
1385     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1386     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1387     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1388     /* } */
1389   /* scale  the diagonal block */
1390   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1391 
1392   /* if (rr) { */
1393     /* Do a scatter end and then right scale the off-diagonal block */
1394     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1395     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1396   }
1397 
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1403 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1404 {
1405   PetscFunctionBegin;
1406   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1411 int MatPrintHelp_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1414   MPI_Comm    comm = A->comm;
1415   static int  called = 0;
1416   int         ierr;
1417 
1418   PetscFunctionBegin;
1419   if (!a->rank) {
1420     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1421   }
1422   if (called) {PetscFunctionReturn(0);} else called = 1;
1423   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1424   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1430 int MatSetUnfactored_MPISBAIJ(Mat A)
1431 {
1432   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1433   int         ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatEqual_MPISBAIJ"
1444 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1445 {
1446   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1447   Mat         a,b,c,d;
1448   PetscTruth  flg;
1449   int         ierr;
1450 
1451   PetscFunctionBegin;
1452   a = matA->A; b = matA->B;
1453   c = matB->A; d = matB->B;
1454 
1455   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1456   if (flg == PETSC_TRUE) {
1457     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1458   }
1459   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1465 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1466 {
1467   int        ierr;
1468 
1469   PetscFunctionBegin;
1470   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1476 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1477 {
1478   int        i,ierr;
1479   PetscTruth flg;
1480 
1481   for (i=0; i<n; i++) {
1482     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1483     if (!flg) {
1484       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1485     }
1486   }
1487   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 
1492 /* -------------------------------------------------------------------*/
1493 static struct _MatOps MatOps_Values = {
1494        MatSetValues_MPISBAIJ,
1495        MatGetRow_MPISBAIJ,
1496        MatRestoreRow_MPISBAIJ,
1497        MatMult_MPISBAIJ,
1498 /* 4*/ MatMultAdd_MPISBAIJ,
1499        MatMultTranspose_MPISBAIJ,
1500        MatMultTransposeAdd_MPISBAIJ,
1501        0,
1502        0,
1503        0,
1504 /*10*/ 0,
1505        0,
1506        0,
1507        MatRelax_MPISBAIJ,
1508        MatTranspose_MPISBAIJ,
1509 /*15*/ MatGetInfo_MPISBAIJ,
1510        MatEqual_MPISBAIJ,
1511        MatGetDiagonal_MPISBAIJ,
1512        MatDiagonalScale_MPISBAIJ,
1513        MatNorm_MPISBAIJ,
1514 /*20*/ MatAssemblyBegin_MPISBAIJ,
1515        MatAssemblyEnd_MPISBAIJ,
1516        0,
1517        MatSetOption_MPISBAIJ,
1518        MatZeroEntries_MPISBAIJ,
1519 /*25*/ MatZeroRows_MPISBAIJ,
1520        0,
1521        0,
1522        0,
1523        0,
1524 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1525        0,
1526        0,
1527        0,
1528        0,
1529 /*35*/ MatDuplicate_MPISBAIJ,
1530        0,
1531        0,
1532        0,
1533        0,
1534 /*40*/ 0,
1535        MatGetSubMatrices_MPISBAIJ,
1536        0,
1537        MatGetValues_MPISBAIJ,
1538        0,
1539 /*45*/ MatPrintHelp_MPISBAIJ,
1540        MatScale_MPISBAIJ,
1541        0,
1542        0,
1543        0,
1544 /*50*/ MatGetBlockSize_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549 /*55*/ 0,
1550        0,
1551        MatSetUnfactored_MPISBAIJ,
1552        0,
1553        MatSetValuesBlocked_MPISBAIJ,
1554 /*60*/ 0,
1555        0,
1556        0,
1557        MatGetPetscMaps_Petsc,
1558        0,
1559 /*65*/ 0,
1560        0,
1561        0,
1562        0,
1563        0,
1564 /*70*/ MatGetRowMax_MPISBAIJ,
1565        0,
1566        0,
1567        0,
1568        0,
1569 /*75*/ 0,
1570        0,
1571        0,
1572        0,
1573        0,
1574 /*80*/ 0,
1575        0,
1576        0,
1577        0,
1578 /*85*/ MatLoad_MPISBAIJ
1579 };
1580 
1581 
1582 EXTERN_C_BEGIN
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1585 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1586 {
1587   PetscFunctionBegin;
1588   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1589   *iscopy = PETSC_FALSE;
1590   PetscFunctionReturn(0);
1591 }
1592 EXTERN_C_END
1593 
1594 EXTERN_C_BEGIN
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1597 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1598 {
1599   Mat_MPISBAIJ *b;
1600   int          ierr,i,mbs,Mbs;
1601 
1602   PetscFunctionBegin;
1603   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1604 
1605   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1606   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1607   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1608   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1609   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1610   if (d_nnz) {
1611     for (i=0; i<B->m/bs; i++) {
1612       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1613     }
1614   }
1615   if (o_nnz) {
1616     for (i=0; i<B->m/bs; i++) {
1617       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1618     }
1619   }
1620   B->preallocated = PETSC_TRUE;
1621   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1622   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1623   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1624   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1625 
1626   b   = (Mat_MPISBAIJ*)B->data;
1627   mbs = B->m/bs;
1628   Mbs = B->M/bs;
1629   if (mbs*bs != B->m) {
1630     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1631   }
1632 
1633   b->bs  = bs;
1634   b->bs2 = bs*bs;
1635   b->mbs = mbs;
1636   b->nbs = mbs;
1637   b->Mbs = Mbs;
1638   b->Nbs = Mbs;
1639 
1640   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1641   b->rowners[0]    = 0;
1642   for (i=2; i<=b->size; i++) {
1643     b->rowners[i] += b->rowners[i-1];
1644   }
1645   b->rstart    = b->rowners[b->rank];
1646   b->rend      = b->rowners[b->rank+1];
1647   b->cstart    = b->rstart;
1648   b->cend      = b->rend;
1649   for (i=0; i<=b->size; i++) {
1650     b->rowners_bs[i] = b->rowners[i]*bs;
1651   }
1652   b->rstart_bs = b-> rstart*bs;
1653   b->rend_bs   = b->rend*bs;
1654 
1655   b->cstart_bs = b->cstart*bs;
1656   b->cend_bs   = b->cend*bs;
1657 
1658 
1659   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1660   PetscLogObjectParent(B,b->A);
1661   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1662   PetscLogObjectParent(B,b->B);
1663 
1664   /* build cache for off array entries formed */
1665   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1666 
1667   PetscFunctionReturn(0);
1668 }
1669 EXTERN_C_END
1670 
1671 /*MC
1672    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1673    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1674 
1675    Options Database Keys:
1676 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1677 
1678   Level: beginner
1679 
1680 .seealso: MatCreateMPISBAIJ
1681 M*/
1682 
1683 EXTERN_C_BEGIN
1684 #undef __FUNCT__
1685 #define __FUNCT__ "MatCreate_MPISBAIJ"
1686 int MatCreate_MPISBAIJ(Mat B)
1687 {
1688   Mat_MPISBAIJ *b;
1689   int          ierr;
1690   PetscTruth   flg;
1691 
1692   PetscFunctionBegin;
1693 
1694   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1695   B->data = (void*)b;
1696   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1697   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1698 
1699   B->ops->destroy    = MatDestroy_MPISBAIJ;
1700   B->ops->view       = MatView_MPISBAIJ;
1701   B->mapping    = 0;
1702   B->factor     = 0;
1703   B->assembled  = PETSC_FALSE;
1704 
1705   B->insertmode = NOT_SET_VALUES;
1706   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1707   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1708 
1709   /* build local table of row and column ownerships */
1710   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1711   b->cowners    = b->rowners + b->size + 2;
1712   b->rowners_bs = b->cowners + b->size + 2;
1713   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1714 
1715   /* build cache for off array entries formed */
1716   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1717   b->donotstash  = PETSC_FALSE;
1718   b->colmap      = PETSC_NULL;
1719   b->garray      = PETSC_NULL;
1720   b->roworiented = PETSC_TRUE;
1721 
1722 #if defined(PETSC_USE_MAT_SINGLE)
1723   /* stuff for MatSetValues_XXX in single precision */
1724   b->setvalueslen     = 0;
1725   b->setvaluescopy    = PETSC_NULL;
1726 #endif
1727 
1728   /* stuff used in block assembly */
1729   b->barray       = 0;
1730 
1731   /* stuff used for matrix vector multiply */
1732   b->lvec         = 0;
1733   b->Mvctx        = 0;
1734   b->slvec0       = 0;
1735   b->slvec0b      = 0;
1736   b->slvec1       = 0;
1737   b->slvec1a      = 0;
1738   b->slvec1b      = 0;
1739   b->sMvctx       = 0;
1740 
1741   /* stuff for MatGetRow() */
1742   b->rowindices   = 0;
1743   b->rowvalues    = 0;
1744   b->getrowactive = PETSC_FALSE;
1745 
1746   /* hash table stuff */
1747   b->ht           = 0;
1748   b->hd           = 0;
1749   b->ht_size      = 0;
1750   b->ht_flag      = PETSC_FALSE;
1751   b->ht_fact      = 0;
1752   b->ht_total_ct  = 0;
1753   b->ht_insert_ct = 0;
1754 
1755   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1756   if (flg) {
1757     PetscReal fact = 1.39;
1758     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1759     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1760     if (fact <= 1.0) fact = 1.39;
1761     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1762     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1763   }
1764   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1765                                      "MatStoreValues_MPISBAIJ",
1766                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1767   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1768                                      "MatRetrieveValues_MPISBAIJ",
1769                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1770   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1771                                      "MatGetDiagonalBlock_MPISBAIJ",
1772                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1773   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1774                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1775                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1776   PetscFunctionReturn(0);
1777 }
1778 EXTERN_C_END
1779 
1780 /*MC
1781    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1782 
1783    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1784    and MATMPISBAIJ otherwise.
1785 
1786    Options Database Keys:
1787 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1788 
1789   Level: beginner
1790 
1791 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1792 M*/
1793 
1794 EXTERN_C_BEGIN
1795 #undef __FUNCT__
1796 #define __FUNCT__ "MatCreate_SBAIJ"
1797 int MatCreate_SBAIJ(Mat A) {
1798   int ierr,size;
1799 
1800   PetscFunctionBegin;
1801   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1802   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1803   if (size == 1) {
1804     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1805   } else {
1806     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1807   }
1808   PetscFunctionReturn(0);
1809 }
1810 EXTERN_C_END
1811 
1812 #undef __FUNCT__
1813 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1814 /*@C
1815    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1816    the user should preallocate the matrix storage by setting the parameters
1817    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1818    performance can be increased by more than a factor of 50.
1819 
1820    Collective on Mat
1821 
1822    Input Parameters:
1823 +  A - the matrix
1824 .  bs   - size of blockk
1825 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1826            submatrix  (same for all local rows)
1827 .  d_nnz - array containing the number of block nonzeros in the various block rows
1828            in the upper triangular and diagonal part of the in diagonal portion of the local
1829            (possibly different for each block row) or PETSC_NULL.  You must leave room
1830            for the diagonal entry even if it is zero.
1831 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1832            submatrix (same for all local rows).
1833 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1834            off-diagonal portion of the local submatrix (possibly different for
1835            each block row) or PETSC_NULL.
1836 
1837 
1838    Options Database Keys:
1839 .   -mat_no_unroll - uses code that does not unroll the loops in the
1840                      block calculations (much slower)
1841 .   -mat_block_size - size of the blocks to use
1842 
1843    Notes:
1844 
1845    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1846    than it must be used on all processors that share the object for that argument.
1847 
1848    Storage Information:
1849    For a square global matrix we define each processor's diagonal portion
1850    to be its local rows and the corresponding columns (a square submatrix);
1851    each processor's off-diagonal portion encompasses the remainder of the
1852    local matrix (a rectangular submatrix).
1853 
1854    The user can specify preallocated storage for the diagonal part of
1855    the local submatrix with either d_nz or d_nnz (not both).  Set
1856    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1857    memory allocation.  Likewise, specify preallocated storage for the
1858    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1859 
1860    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1861    the figure below we depict these three local rows and all columns (0-11).
1862 
1863 .vb
1864            0 1 2 3 4 5 6 7 8 9 10 11
1865           -------------------
1866    row 3  |  o o o d d d o o o o o o
1867    row 4  |  o o o d d d o o o o o o
1868    row 5  |  o o o d d d o o o o o o
1869           -------------------
1870 .ve
1871 
1872    Thus, any entries in the d locations are stored in the d (diagonal)
1873    submatrix, and any entries in the o locations are stored in the
1874    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1875    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1876 
1877    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1878    plus the diagonal part of the d matrix,
1879    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1880    In general, for PDE problems in which most nonzeros are near the diagonal,
1881    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1882    or you will get TERRIBLE performance; see the users' manual chapter on
1883    matrices.
1884 
1885    Level: intermediate
1886 
1887 .keywords: matrix, block, aij, compressed row, sparse, parallel
1888 
1889 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1890 @*/
1891 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1892 {
1893   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1894 
1895   PetscFunctionBegin;
1896   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1897   if (f) {
1898     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1899   }
1900   PetscFunctionReturn(0);
1901 }
1902 
1903 #undef __FUNCT__
1904 #define __FUNCT__ "MatCreateMPISBAIJ"
1905 /*@C
1906    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1907    (block compressed row).  For good matrix assembly performance
1908    the user should preallocate the matrix storage by setting the parameters
1909    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1910    performance can be increased by more than a factor of 50.
1911 
1912    Collective on MPI_Comm
1913 
1914    Input Parameters:
1915 +  comm - MPI communicator
1916 .  bs   - size of blockk
1917 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1918            This value should be the same as the local size used in creating the
1919            y vector for the matrix-vector product y = Ax.
1920 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1921            This value should be the same as the local size used in creating the
1922            x vector for the matrix-vector product y = Ax.
1923 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1924 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1925 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1926            submatrix  (same for all local rows)
1927 .  d_nnz - array containing the number of block nonzeros in the various block rows
1928            in the upper triangular portion of the in diagonal portion of the local
1929            (possibly different for each block block row) or PETSC_NULL.
1930            You must leave room for the diagonal entry even if it is zero.
1931 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1932            submatrix (same for all local rows).
1933 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1934            off-diagonal portion of the local submatrix (possibly different for
1935            each block row) or PETSC_NULL.
1936 
1937    Output Parameter:
1938 .  A - the matrix
1939 
1940    Options Database Keys:
1941 .   -mat_no_unroll - uses code that does not unroll the loops in the
1942                      block calculations (much slower)
1943 .   -mat_block_size - size of the blocks to use
1944 .   -mat_mpi - use the parallel matrix data structures even on one processor
1945                (defaults to using SeqBAIJ format on one processor)
1946 
1947    Notes:
1948    The user MUST specify either the local or global matrix dimensions
1949    (possibly both).
1950 
1951    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1952    than it must be used on all processors that share the object for that argument.
1953 
1954    Storage Information:
1955    For a square global matrix we define each processor's diagonal portion
1956    to be its local rows and the corresponding columns (a square submatrix);
1957    each processor's off-diagonal portion encompasses the remainder of the
1958    local matrix (a rectangular submatrix).
1959 
1960    The user can specify preallocated storage for the diagonal part of
1961    the local submatrix with either d_nz or d_nnz (not both).  Set
1962    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1963    memory allocation.  Likewise, specify preallocated storage for the
1964    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1965 
1966    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1967    the figure below we depict these three local rows and all columns (0-11).
1968 
1969 .vb
1970            0 1 2 3 4 5 6 7 8 9 10 11
1971           -------------------
1972    row 3  |  o o o d d d o o o o o o
1973    row 4  |  o o o d d d o o o o o o
1974    row 5  |  o o o d d d o o o o o o
1975           -------------------
1976 .ve
1977 
1978    Thus, any entries in the d locations are stored in the d (diagonal)
1979    submatrix, and any entries in the o locations are stored in the
1980    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1981    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1982 
1983    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1984    plus the diagonal part of the d matrix,
1985    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1986    In general, for PDE problems in which most nonzeros are near the diagonal,
1987    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1988    or you will get TERRIBLE performance; see the users' manual chapter on
1989    matrices.
1990 
1991    Level: intermediate
1992 
1993 .keywords: matrix, block, aij, compressed row, sparse, parallel
1994 
1995 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1996 @*/
1997 
1998 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
1999 {
2000   int ierr,size;
2001 
2002   PetscFunctionBegin;
2003   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2004   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2005   if (size > 1) {
2006     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2007     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2008   } else {
2009     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2010     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2011   }
2012   PetscFunctionReturn(0);
2013 }
2014 
2015 
2016 #undef __FUNCT__
2017 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2018 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2019 {
2020   Mat          mat;
2021   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2022   int          ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2023   PetscScalar  *array;
2024 
2025   PetscFunctionBegin;
2026   *newmat       = 0;
2027   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2028   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2029   /* ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);  -- cause error? */
2030   mat->factor       = matin->factor;
2031   mat->preallocated = PETSC_TRUE;
2032   mat->assembled    = PETSC_TRUE;
2033   a = (Mat_MPISBAIJ*)mat->data;
2034   a->bs  = oldmat->bs;
2035   a->bs2 = oldmat->bs2;
2036   a->mbs = oldmat->mbs;
2037   a->nbs = oldmat->nbs;
2038   a->Mbs = oldmat->Mbs;
2039   a->Nbs = oldmat->Nbs;
2040 
2041   a->rstart       = oldmat->rstart;
2042   a->rend         = oldmat->rend;
2043   a->cstart       = oldmat->cstart;
2044   a->cend         = oldmat->cend;
2045   a->size         = oldmat->size;
2046   a->rank         = oldmat->rank;
2047   a->donotstash   = oldmat->donotstash;
2048   a->roworiented  = oldmat->roworiented;
2049   a->rowindices   = 0;
2050   a->rowvalues    = 0;
2051   a->getrowactive = PETSC_FALSE;
2052   a->barray       = 0;
2053   a->rstart_bs    = oldmat->rstart_bs;
2054   a->rend_bs      = oldmat->rend_bs;
2055   a->cstart_bs    = oldmat->cstart_bs;
2056   a->cend_bs      = oldmat->cend_bs;
2057 
2058   /* hash table stuff */
2059   a->ht           = 0;
2060   a->hd           = 0;
2061   a->ht_size      = 0;
2062   a->ht_flag      = oldmat->ht_flag;
2063   a->ht_fact      = oldmat->ht_fact;
2064   a->ht_total_ct  = 0;
2065   a->ht_insert_ct = 0;
2066 
2067   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2068   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2069   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2070   if (oldmat->colmap) {
2071 #if defined (PETSC_USE_CTABLE)
2072     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2073 #else
2074     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2075     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2076     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2077 #endif
2078   } else a->colmap = 0;
2079 
2080   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2081     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2082     PetscLogObjectMemory(mat,len*sizeof(int));
2083     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2084   } else a->garray = 0;
2085 
2086   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2087   PetscLogObjectParent(mat,a->lvec);
2088   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2089   PetscLogObjectParent(mat,a->Mvctx);
2090 
2091   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2092   PetscLogObjectParent(mat,a->slvec0);
2093   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2094   PetscLogObjectParent(mat,a->slvec1);
2095 
2096   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2097   ierr = VecGetArrayFast(a->slvec1,&array);CHKERRQ(ierr);
2098   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2099   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2100   ierr = VecRestoreArrayFast(a->slvec1,&array);CHKERRQ(ierr);
2101   ierr = VecGetArrayFast(a->slvec0,&array);CHKERRQ(ierr);
2102   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2103   ierr = VecRestoreArrayFast(a->slvec0,&array);CHKERRQ(ierr);
2104   PetscLogObjectParent(mat,a->slvec0);
2105   PetscLogObjectParent(mat,a->slvec1);
2106   PetscLogObjectParent(mat,a->slvec0b);
2107   PetscLogObjectParent(mat,a->slvec1a);
2108   PetscLogObjectParent(mat,a->slvec1b);
2109 
2110   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2111   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2112   a->sMvctx = oldmat->sMvctx;
2113   PetscLogObjectParent(mat,a->sMvctx);
2114 
2115   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2116   PetscLogObjectParent(mat,a->A);
2117   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2118   PetscLogObjectParent(mat,a->B);
2119   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2120   *newmat = mat;
2121   PetscFunctionReturn(0);
2122 }
2123 
2124 #include "petscsys.h"
2125 
2126 #undef __FUNCT__
2127 #define __FUNCT__ "MatLoad_MPISBAIJ"
2128 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2129 {
2130   Mat          A;
2131   int          i,nz,ierr,j,rstart,rend,fd;
2132   PetscScalar  *vals,*buf;
2133   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2134   MPI_Status   status;
2135   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2136   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2137   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2138   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2139   int          dcount,kmax,k,nzcount,tmp;
2140 
2141   PetscFunctionBegin;
2142   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2143 
2144   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2145   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2146   if (!rank) {
2147     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2148     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2149     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2150     if (header[3] < 0) {
2151       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2152     }
2153   }
2154 
2155   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2156   M = header[1]; N = header[2];
2157 
2158   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2159 
2160   /*
2161      This code adds extra rows to make sure the number of rows is
2162      divisible by the blocksize
2163   */
2164   Mbs        = M/bs;
2165   extra_rows = bs - M + bs*(Mbs);
2166   if (extra_rows == bs) extra_rows = 0;
2167   else                  Mbs++;
2168   if (extra_rows &&!rank) {
2169     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2170   }
2171 
2172   /* determine ownership of all rows */
2173   mbs        = Mbs/size + ((Mbs % size) > rank);
2174   m          = mbs*bs;
2175   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2176   browners   = rowners + size + 1;
2177   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2178   rowners[0] = 0;
2179   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2180   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2181   rstart = rowners[rank];
2182   rend   = rowners[rank+1];
2183 
2184   /* distribute row lengths to all processors */
2185   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2186   if (!rank) {
2187     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2188     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2189     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2190     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2191     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2192     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2193     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2194   } else {
2195     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2196   }
2197 
2198   if (!rank) {   /* procs[0] */
2199     /* calculate the number of nonzeros on each processor */
2200     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2201     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2202     for (i=0; i<size; i++) {
2203       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2204         procsnz[i] += rowlengths[j];
2205       }
2206     }
2207     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2208 
2209     /* determine max buffer needed and allocate it */
2210     maxnz = 0;
2211     for (i=0; i<size; i++) {
2212       maxnz = PetscMax(maxnz,procsnz[i]);
2213     }
2214     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2215 
2216     /* read in my part of the matrix column indices  */
2217     nz     = procsnz[0];
2218     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2219     mycols = ibuf;
2220     if (size == 1)  nz -= extra_rows;
2221     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2222     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2223 
2224     /* read in every ones (except the last) and ship off */
2225     for (i=1; i<size-1; i++) {
2226       nz   = procsnz[i];
2227       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2228       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2229     }
2230     /* read in the stuff for the last proc */
2231     if (size != 1) {
2232       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2233       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2234       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2235       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2236     }
2237     ierr = PetscFree(cols);CHKERRQ(ierr);
2238   } else {  /* procs[i], i>0 */
2239     /* determine buffer space needed for message */
2240     nz = 0;
2241     for (i=0; i<m; i++) {
2242       nz += locrowlens[i];
2243     }
2244     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2245     mycols = ibuf;
2246     /* receive message of column indices*/
2247     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2248     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2249     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2250   }
2251 
2252   /* loop over local rows, determining number of off diagonal entries */
2253   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2254   odlens   = dlens + (rend-rstart);
2255   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2256   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2257   masked1  = mask    + Mbs;
2258   masked2  = masked1 + Mbs;
2259   rowcount = 0; nzcount = 0;
2260   for (i=0; i<mbs; i++) {
2261     dcount  = 0;
2262     odcount = 0;
2263     for (j=0; j<bs; j++) {
2264       kmax = locrowlens[rowcount];
2265       for (k=0; k<kmax; k++) {
2266         tmp = mycols[nzcount++]/bs; /* block col. index */
2267         if (!mask[tmp]) {
2268           mask[tmp] = 1;
2269           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2270           else masked1[dcount++] = tmp; /* entry in diag portion */
2271         }
2272       }
2273       rowcount++;
2274     }
2275 
2276     dlens[i]  = dcount;  /* d_nzz[i] */
2277     odlens[i] = odcount; /* o_nzz[i] */
2278 
2279     /* zero out the mask elements we set */
2280     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2281     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2282   }
2283 
2284   /* create our matrix */
2285   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2286   ierr = MatSetType(A,type);CHKERRQ(ierr);
2287   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2288   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2289 
2290   if (!rank) {
2291     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2292     /* read in my part of the matrix numerical values  */
2293     nz = procsnz[0];
2294     vals = buf;
2295     mycols = ibuf;
2296     if (size == 1)  nz -= extra_rows;
2297     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2298     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2299 
2300     /* insert into matrix */
2301     jj      = rstart*bs;
2302     for (i=0; i<m; i++) {
2303       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2304       mycols += locrowlens[i];
2305       vals   += locrowlens[i];
2306       jj++;
2307     }
2308 
2309     /* read in other processors (except the last one) and ship out */
2310     for (i=1; i<size-1; i++) {
2311       nz   = procsnz[i];
2312       vals = buf;
2313       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2314       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2315     }
2316     /* the last proc */
2317     if (size != 1){
2318       nz   = procsnz[i] - extra_rows;
2319       vals = buf;
2320       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2321       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2322       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2323     }
2324     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2325 
2326   } else {
2327     /* receive numeric values */
2328     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2329 
2330     /* receive message of values*/
2331     vals   = buf;
2332     mycols = ibuf;
2333     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2334     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2335     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2336 
2337     /* insert into matrix */
2338     jj      = rstart*bs;
2339     for (i=0; i<m; i++) {
2340       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2341       mycols += locrowlens[i];
2342       vals   += locrowlens[i];
2343       jj++;
2344     }
2345   }
2346 
2347   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2348   ierr = PetscFree(buf);CHKERRQ(ierr);
2349   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2350   ierr = PetscFree(rowners);CHKERRQ(ierr);
2351   ierr = PetscFree(dlens);CHKERRQ(ierr);
2352   ierr = PetscFree(mask);CHKERRQ(ierr);
2353   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2354   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2355   *newmat = A;
2356   PetscFunctionReturn(0);
2357 }
2358 
2359 #undef __FUNCT__
2360 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2361 /*@
2362    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2363 
2364    Input Parameters:
2365 .  mat  - the matrix
2366 .  fact - factor
2367 
2368    Collective on Mat
2369 
2370    Level: advanced
2371 
2372   Notes:
2373    This can also be set by the command line option: -mat_use_hash_table fact
2374 
2375 .keywords: matrix, hashtable, factor, HT
2376 
2377 .seealso: MatSetOption()
2378 @*/
2379 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2380 {
2381   PetscFunctionBegin;
2382   SETERRQ(1,"Function not yet written for SBAIJ format");
2383   /* PetscFunctionReturn(0); */
2384 }
2385 
2386 #undef __FUNCT__
2387 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2388 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2389 {
2390   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2391   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2392   PetscReal    atmp;
2393   PetscReal    *work,*svalues,*rvalues;
2394   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2395   int          rank,size,*rowners_bs,dest,count,source;
2396   PetscScalar  *va;
2397   MatScalar    *ba;
2398   MPI_Status   stat;
2399 
2400   PetscFunctionBegin;
2401   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2402   ierr = VecGetArrayFast(v,&va);CHKERRQ(ierr);
2403 
2404   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2405   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2406 
2407   bs   = a->bs;
2408   mbs  = a->mbs;
2409   Mbs  = a->Mbs;
2410   ba   = b->a;
2411   bi   = b->i;
2412   bj   = b->j;
2413   /*
2414   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2415   PetscSynchronizedFlush(A->comm);
2416   */
2417 
2418   /* find ownerships */
2419   rowners_bs = a->rowners_bs;
2420   /*
2421   if (!rank){
2422     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2423   }
2424   */
2425 
2426   /* each proc creates an array to be distributed */
2427   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2428   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2429 
2430   /* row_max for B */
2431   if (rank != size-1){
2432     for (i=0; i<mbs; i++) {
2433       ncols = bi[1] - bi[0]; bi++;
2434       brow  = bs*i;
2435       for (j=0; j<ncols; j++){
2436         bcol = bs*(*bj);
2437         for (kcol=0; kcol<bs; kcol++){
2438           col = bcol + kcol;                 /* local col index */
2439           col += rowners_bs[rank+1];      /* global col index */
2440           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2441           for (krow=0; krow<bs; krow++){
2442             atmp = PetscAbsScalar(*ba); ba++;
2443             row = brow + krow;    /* local row index */
2444             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2445             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2446             if (work[col] < atmp) work[col] = atmp;
2447           }
2448         }
2449         bj++;
2450       }
2451     }
2452     /*
2453       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2454       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2455       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2456       */
2457 
2458     /* send values to its owners */
2459     for (dest=rank+1; dest<size; dest++){
2460       svalues = work + rowners_bs[dest];
2461       count   = rowners_bs[dest+1]-rowners_bs[dest];
2462       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2463       /*
2464       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2465       PetscSynchronizedFlush(A->comm);
2466       */
2467     }
2468   }
2469 
2470   /* receive values */
2471   if (rank){
2472     rvalues = work;
2473     count   = rowners_bs[rank+1]-rowners_bs[rank];
2474     for (source=0; source<rank; source++){
2475       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2476       /* process values */
2477       for (i=0; i<count; i++){
2478         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2479       }
2480       /*
2481       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2482       PetscSynchronizedFlush(A->comm);
2483       */
2484     }
2485   }
2486 
2487   ierr = VecRestoreArrayFast(v,&va);CHKERRQ(ierr);
2488   ierr = PetscFree(work);CHKERRQ(ierr);
2489   PetscFunctionReturn(0);
2490 }
2491 
2492 #undef __FUNCT__
2493 #define __FUNCT__ "MatRelax_MPISBAIJ"
2494 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2495 {
2496   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2497   int            ierr,mbs=mat->mbs,bs=mat->bs;
2498   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2499   Vec            bb1;
2500 
2501   PetscFunctionBegin;
2502   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2503   if (bs > 1)
2504     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2505 
2506   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2507     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2508       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2509       its--;
2510     }
2511 
2512     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2513     while (its--){
2514 
2515       /* lower triangular part: slvec0b = - B^T*xx */
2516       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2517 
2518       /* copy xx into slvec0a */
2519       ierr = VecGetArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr);
2520       ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2521       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2522       ierr = VecRestoreArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr);
2523 
2524       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2525 
2526       /* copy bb into slvec1a */
2527       ierr = VecGetArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr);
2528       ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
2529       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2530       ierr = VecRestoreArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr);
2531 
2532       /* set slvec1b = 0 */
2533       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2534 
2535       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2536       ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2537       ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
2538       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2539 
2540       /* upper triangular part: bb1 = bb1 - B*x */
2541       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2542 
2543       /* local diagonal sweep */
2544       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2545     }
2546     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2547   } else {
2548     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2549   }
2550   PetscFunctionReturn(0);
2551 }
2552 
2553 #undef __FUNCT__
2554 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2555 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2556 {
2557   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2558   int            ierr;
2559   PetscScalar    mone=-1.0;
2560   Vec            lvec1,bb1;
2561 
2562   PetscFunctionBegin;
2563   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2564   if (mat->bs > 1)
2565     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2566 
2567   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2568     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2569       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2570       its--;
2571     }
2572 
2573     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2574     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2575     while (its--){
2576       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2577 
2578       /* lower diagonal part: bb1 = bb - B^T*xx */
2579       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2580       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2581 
2582       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2583       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2584       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2585 
2586       /* upper diagonal part: bb1 = bb1 - B*x */
2587       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2588       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2589 
2590       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2591 
2592       /* diagonal sweep */
2593       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2594     }
2595     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2596     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2597   } else {
2598     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2599   }
2600   PetscFunctionReturn(0);
2601 }
2602 
2603