xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision fcca9d3d5c45103cafcd0f6bc9babe194d935ced)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 extern int MatSetUpMultiply_MPISBAIJ(Mat);
8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 int MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   int          ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 int MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   int          ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   int          ierr,i,N = m*n;
238   MatScalar    *vsingle;
239 
240   PetscFunctionBegin;
241   if (N > b->setvalueslen) {
242     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
243     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
244     b->setvalueslen  = N;
245   }
246   vsingle = b->setvaluescopy;
247 
248   for (i=0; i<N; i++) {
249     vsingle[i] = v[i];
250   }
251   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
258 {
259   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
260   int         ierr,i,N = m*n*b->bs2;
261   MatScalar   *vsingle;
262 
263   PetscFunctionBegin;
264   if (N > b->setvalueslen) {
265     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
266     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
267     b->setvalueslen  = N;
268   }
269   vsingle = b->setvaluescopy;
270   for (i=0; i<N; i++) {
271     vsingle[i] = v[i];
272   }
273   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
279 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
282   int         ierr,i,N = m*n;
283   MatScalar   *vsingle;
284 
285   PetscFunctionBegin;
286   SETERRQ(1,"Function not yet written for SBAIJ format");
287   /* PetscFunctionReturn(0); */
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
292 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
293 {
294   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
295   int         ierr,i,N = m*n*b->bs2;
296   MatScalar   *vsingle;
297 
298   PetscFunctionBegin;
299   SETERRQ(1,"Function not yet written for SBAIJ format");
300   /* PetscFunctionReturn(0); */
301 }
302 #endif
303 
304 /* Only add/insert a(i,j) with i<=j (blocks).
305    Any a(i,j) with i>j input by user is ingored.
306 */
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
309 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
310 {
311   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
312   MatScalar    value;
313   PetscTruth   roworiented = baij->roworiented;
314   int          ierr,i,j,row,col;
315   int          rstart_orig=baij->rstart_bs;
316   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
317   int          cend_orig=baij->cend_bs,bs=baij->bs;
318 
319   /* Some Variables required in the macro */
320   Mat          A = baij->A;
321   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
322   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar    *aa=a->a;
324 
325   Mat          B = baij->B;
326   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
327   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar    *ba=b->a;
329 
330   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   int          low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar    *ap,*bap;
333 
334   /* for stash */
335   int          n_loc, *in_loc=0;
336   MatScalar    *v_loc=0;
337 
338   PetscFunctionBegin;
339 
340   if(!baij->donotstash){
341     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
342     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
343   }
344 
345   for (i=0; i<m; i++) {
346     if (im[i] < 0) continue;
347 #if defined(PETSC_USE_BOPT_g)
348     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
349 #endif
350     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
351       row = im[i] - rstart_orig;              /* local row index */
352       for (j=0; j<n; j++) {
353         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
354         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
355           col = in[j] - cstart_orig;          /* local col index */
356           brow = row/bs; bcol = col/bs;
357           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
358           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
359           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
360           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
361         } else if (in[j] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
364 #endif
365         else {  /* off-diag entry (B) */
366           if (mat->was_assembled) {
367             if (!baij->colmap) {
368               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
369             }
370 #if defined (PETSC_USE_CTABLE)
371             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
372             col  = col - 1;
373 #else
374             col = baij->colmap[in[j]/bs] - 1;
375 #endif
376             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
377               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
378               col =  in[j];
379               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
380               B = baij->B;
381               b = (Mat_SeqBAIJ*)(B)->data;
382               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
383               ba=b->a;
384             } else col += in[j]%bs;
385           } else col = in[j];
386           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
387           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
388           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
389         }
390       }
391     } else {  /* off processor entry */
392       if (!baij->donotstash) {
393         n_loc = 0;
394         for (j=0; j<n; j++){
395           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
396           in_loc[n_loc] = in[j];
397           if (roworiented) {
398             v_loc[n_loc] = v[i*n+j];
399           } else {
400             v_loc[n_loc] = v[j*m+i];
401           }
402           n_loc++;
403         }
404         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
405       }
406     }
407   }
408 
409   if(!baij->donotstash){
410     ierr = PetscFree(in_loc);CHKERRQ(ierr);
411     ierr = PetscFree(v_loc);CHKERRQ(ierr);
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
418 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
419 {
420   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
421   const MatScalar *value;
422   MatScalar       *barray=baij->barray;
423   PetscTruth      roworiented = baij->roworiented;
424   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
425   int             rend=baij->rend,cstart=baij->cstart,stepval;
426   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
427 
428   PetscFunctionBegin;
429   if(!barray) {
430     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
431     baij->barray = barray;
432   }
433 
434   if (roworiented) {
435     stepval = (n-1)*bs;
436   } else {
437     stepval = (m-1)*bs;
438   }
439   for (i=0; i<m; i++) {
440     if (im[i] < 0) continue;
441 #if defined(PETSC_USE_BOPT_g)
442     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
443 #endif
444     if (im[i] >= rstart && im[i] < rend) {
445       row = im[i] - rstart;
446       for (j=0; j<n; j++) {
447         /* If NumCol = 1 then a copy is not required */
448         if ((roworiented) && (n == 1)) {
449           barray = (MatScalar*) v + i*bs2;
450         } else if((!roworiented) && (m == 1)) {
451           barray = (MatScalar*) v + j*bs2;
452         } else { /* Here a copy is required */
453           if (roworiented) {
454             value = v + i*(stepval+bs)*bs + j*bs;
455           } else {
456             value = v + j*(stepval+bs)*bs + i*bs;
457           }
458           for (ii=0; ii<bs; ii++,value+=stepval) {
459             for (jj=0; jj<bs; jj++) {
460               *barray++  = *value++;
461             }
462           }
463           barray -=bs2;
464         }
465 
466         if (in[j] >= cstart && in[j] < cend){
467           col  = in[j] - cstart;
468           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
469         }
470         else if (in[j] < 0) continue;
471 #if defined(PETSC_USE_BOPT_g)
472         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
473 #endif
474         else {
475           if (mat->was_assembled) {
476             if (!baij->colmap) {
477               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
478             }
479 
480 #if defined(PETSC_USE_BOPT_g)
481 #if defined (PETSC_USE_CTABLE)
482             { int data;
483               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
484               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
485             }
486 #else
487             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
488 #endif
489 #endif
490 #if defined (PETSC_USE_CTABLE)
491 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
492             col  = (col - 1)/bs;
493 #else
494             col = (baij->colmap[in[j]] - 1)/bs;
495 #endif
496             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
497               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
498               col =  in[j];
499             }
500           }
501           else col = in[j];
502           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
503         }
504       }
505     } else {
506       if (!baij->donotstash) {
507         if (roworiented) {
508           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
509         } else {
510           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
511         }
512       }
513     }
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 #define HASH_KEY 0.6180339887
519 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
520 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
521 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 #undef __FUNCT__
523 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
524 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
525 {
526   PetscFunctionBegin;
527   SETERRQ(1,"Function not yet written for SBAIJ format");
528   /* PetscFunctionReturn(0); */
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
533 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
534 {
535   PetscFunctionBegin;
536   SETERRQ(1,"Function not yet written for SBAIJ format");
537   /* PetscFunctionReturn(0); */
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatGetValues_MPISBAIJ"
542 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
543 {
544   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
545   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
546   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
547 
548   PetscFunctionBegin;
549   for (i=0; i<m; i++) {
550     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
551     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
552     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
553       row = idxm[i] - bsrstart;
554       for (j=0; j<n; j++) {
555         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
556         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
557         if (idxn[j] >= bscstart && idxn[j] < bscend){
558           col = idxn[j] - bscstart;
559           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
560         } else {
561           if (!baij->colmap) {
562             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
563           }
564 #if defined (PETSC_USE_CTABLE)
565           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
566           data --;
567 #else
568           data = baij->colmap[idxn[j]/bs]-1;
569 #endif
570           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
571           else {
572             col  = data + idxn[j]%bs;
573             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
574           }
575         }
576       }
577     } else {
578       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
579     }
580   }
581  PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatNorm_MPISBAIJ"
586 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
587 {
588   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
589   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
590   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
591   int        ierr;
592   PetscReal  sum[2],*lnorm2;
593 
594   PetscFunctionBegin;
595   if (baij->size == 1) {
596     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
597   } else {
598     if (type == NORM_FROBENIUS) {
599       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
600       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
601       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
602       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
603       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
604       /*
605       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
606       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
607       */
608       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
609       /*
610       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
611       PetscSynchronizedFlush(mat->comm); */
612 
613       *norm = sqrt(sum[0] + 2*sum[1]);
614       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
617     }
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 /*
623   Creates the hash table, and sets the table
624   This table is created only once.
625   If new entried need to be added to the matrix
626   then the hash table has to be destroyed and
627   recreated.
628 */
629 #undef __FUNCT__
630 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
631 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
632 {
633   PetscFunctionBegin;
634   SETERRQ(1,"Function not yet written for SBAIJ format");
635   /* PetscFunctionReturn(0); */
636 }
637 
638 #undef __FUNCT__
639 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
640 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
641 {
642   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
643   int         ierr,nstash,reallocs;
644   InsertMode  addv;
645 
646   PetscFunctionBegin;
647   if (baij->donotstash) {
648     PetscFunctionReturn(0);
649   }
650 
651   /* make sure all processors are either in INSERTMODE or ADDMODE */
652   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
653   if (addv == (ADD_VALUES|INSERT_VALUES)) {
654     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
655   }
656   mat->insertmode = addv; /* in case this processor had no cache */
657 
658   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
659   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
660   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
661   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
669 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
672   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
673   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
674   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
675   int         *row,*col,other_disassembled;
676   PetscTruth  r1,r2,r3;
677   MatScalar   *val;
678   InsertMode  addv = mat->insertmode;
679 
680   PetscFunctionBegin;
681 
682   if (!baij->donotstash) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       /*
686       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
687       PetscSynchronizedFlush(mat->comm);
688       */
689       if (!flg) break;
690 
691       for (i=0; i<n;) {
692         /* Now identify the consecutive vals belonging to the same row */
693         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
694         if (j < n) ncols = j-i;
695         else       ncols = n-i;
696         /* Now assemble all these values with a single function call */
697         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702     /* Now process the block-stash. Since the values are stashed column-oriented,
703        set the roworiented flag to column oriented, and after MatSetValues()
704        restore the original flags */
705     r1 = baij->roworiented;
706     r2 = a->roworiented;
707     r3 = b->roworiented;
708     baij->roworiented = PETSC_FALSE;
709     a->roworiented    = PETSC_FALSE;
710     b->roworiented    = PETSC_FALSE;
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
718         if (j < n) ncols = j-i;
719         else       ncols = n-i;
720         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
721         i = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
725     baij->roworiented = r1;
726     a->roworiented    = r2;
727     b->roworiented    = r3;
728   }
729 
730   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
731   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
732 
733   /* determine if any processor has disassembled, if so we must
734      also disassemble ourselfs, in order that we may reassemble. */
735   /*
736      if nonzero structure of submatrix B cannot change then we know that
737      no processor disassembled thus we can skip this stuff
738   */
739   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
740     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
741     if (mat->was_assembled && !other_disassembled) {
742       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
743     }
744   }
745 
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
748   }
749   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
750   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
751 
752 #if defined(PETSC_USE_BOPT_g)
753   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
754     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
755     baij->ht_total_ct  = 0;
756     baij->ht_insert_ct = 0;
757   }
758 #endif
759   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
760     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
761     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
762     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
763   }
764 
765   if (baij->rowvalues) {
766     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
767     baij->rowvalues = 0;
768   }
769 
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
775 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
776 {
777   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
778   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
779   PetscTruth        isascii,isdraw;
780   PetscViewer       sviewer;
781   PetscViewerFormat format;
782 
783   PetscFunctionBegin;
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
786   if (isascii) {
787     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
788     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
789       MatInfo info;
790       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
791       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
793               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
794               baij->bs,(int)info.memory);CHKERRQ(ierr);
795       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
796       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
797       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
798       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
799       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
800       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
801       PetscFunctionReturn(0);
802     } else if (format == PETSC_VIEWER_ASCII_INFO) {
803       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw       draw;
810     PetscTruth isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
813   }
814 
815   if (size == 1) {
816     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
817     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
818   } else {
819     /* assemble the entire matrix onto first processor. */
820     Mat         A;
821     Mat_SeqSBAIJ *Aloc;
822     Mat_SeqBAIJ *Bloc;
823     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
824     MatScalar   *a;
825 
826     if (!rank) {
827       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
828     } else {
829       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
830     }
831     PetscLogObjectParent(mat,A);
832 
833     /* copy over the A part */
834     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
835     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
836     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
837 
838     for (i=0; i<mbs; i++) {
839       rvals[0] = bs*(baij->rstart + i);
840       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
841       for (j=ai[i]; j<ai[i+1]; j++) {
842         col = (baij->cstart+aj[j])*bs;
843         for (k=0; k<bs; k++) {
844           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
845           col++; a += bs;
846         }
847       }
848     }
849     /* copy over the B part */
850     Bloc = (Mat_SeqBAIJ*)baij->B->data;
851     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
852     for (i=0; i<mbs; i++) {
853       rvals[0] = bs*(baij->rstart + i);
854       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
855       for (j=ai[i]; j<ai[i+1]; j++) {
856         col = baij->garray[aj[j]]*bs;
857         for (k=0; k<bs; k++) {
858           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
859           col++; a += bs;
860         }
861       }
862     }
863     ierr = PetscFree(rvals);CHKERRQ(ierr);
864     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
865     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     /*
867        Everyone has to call to draw the matrix since the graphics waits are
868        synchronized across all processors that share the PetscDraw object
869     */
870     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
871     if (!rank) {
872       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
873       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
874     }
875     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
876     ierr = MatDestroy(A);CHKERRQ(ierr);
877   }
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "MatView_MPISBAIJ"
883 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
884 {
885   int        ierr;
886   PetscTruth isascii,isdraw,issocket,isbinary;
887 
888   PetscFunctionBegin;
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
893   if (isascii || isdraw || issocket || isbinary) {
894     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
895   } else {
896     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatDestroy_MPISBAIJ"
903 int MatDestroy_MPISBAIJ(Mat mat)
904 {
905   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
906   int         ierr;
907 
908   PetscFunctionBegin;
909 #if defined(PETSC_USE_LOG)
910   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
911 #endif
912   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
913   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
914   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
915   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
917 #if defined (PETSC_USE_CTABLE)
918   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
919 #else
920   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
921 #endif
922   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
923   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
924   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
925   if (baij->slvec0) {
926     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
927     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
928   }
929   if (baij->slvec1) {
930     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
931     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
933   }
934   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
935   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
936   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
937   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
938 #if defined(PETSC_USE_MAT_SINGLE)
939   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
940 #endif
941   ierr = PetscFree(baij);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_MPISBAIJ"
947 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
948 {
949   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
950   int         ierr,nt,mbs=a->mbs,bs=a->bs;
951   PetscScalar *x,*from,zero=0.0;
952 
953   PetscFunctionBegin;
954   /*
955   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
956   PetscSynchronizedFlush(A->comm);
957   */
958   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
959   if (nt != A->n) {
960     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
961   }
962   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
963   if (nt != A->m) {
964     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
965   }
966 
967   /* diagonal part */
968   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
969   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
970 
971   /* subdiagonal part */
972   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
973 
974   /* copy x into the vec slvec0 */
975   ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr);
976   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
977   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
978   ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr);
979 
980   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
981   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
982   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
983 
984   /* supperdiagonal part */
985   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
986 
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
992 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
995   int         ierr,nt;
996 
997   PetscFunctionBegin;
998   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
999   if (nt != A->n) {
1000     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1001   }
1002   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1003   if (nt != A->m) {
1004     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1005   }
1006 
1007   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1008   /* do diagonal part */
1009   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1010   /* do supperdiagonal part */
1011   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1012   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1013   /* do subdiagonal part */
1014   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1015   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1016   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1017 
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1023 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1024 {
1025   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1026   int          ierr,mbs=a->mbs,bs=a->bs;
1027   PetscScalar  *x,*from,zero=0.0;
1028 
1029   PetscFunctionBegin;
1030   /*
1031   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1032   PetscSynchronizedFlush(A->comm);
1033   */
1034   /* diagonal part */
1035   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1036   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1037 
1038   /* subdiagonal part */
1039   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1040 
1041   /* copy x into the vec slvec0 */
1042   ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr);
1043   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1044   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1045   ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr);
1046 
1047   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1048   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1049   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1050 
1051   /* supperdiagonal part */
1052   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1053 
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1059 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1062   int        ierr;
1063 
1064   PetscFunctionBegin;
1065   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1066   /* do diagonal part */
1067   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1068   /* do supperdiagonal part */
1069   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1070   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1071 
1072   /* do subdiagonal part */
1073   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1074   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1075   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1076 
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1082 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1083 {
1084   int ierr;
1085 
1086   PetscFunctionBegin;
1087   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1093 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1094 {
1095   int ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 /*
1103   This only works correctly for square matrices where the subblock A->A is the
1104    diagonal block
1105 */
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1108 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1109 {
1110   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1111   int         ierr;
1112 
1113   PetscFunctionBegin;
1114   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1115   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatScale_MPISBAIJ"
1121 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1122 {
1123   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1124   int         ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1128   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1134 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1135 {
1136   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1137   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1138   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1139   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1140   int            *cmap,*idx_p,cstart = mat->cstart;
1141 
1142   PetscFunctionBegin;
1143   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1144   mat->getrowactive = PETSC_TRUE;
1145 
1146   if (!mat->rowvalues && (idx || v)) {
1147     /*
1148         allocate enough space to hold information from the longest row.
1149     */
1150     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1151     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1152     int     max = 1,mbs = mat->mbs,tmp;
1153     for (i=0; i<mbs; i++) {
1154       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1155       if (max < tmp) { max = tmp; }
1156     }
1157     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1158     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1159   }
1160 
1161   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1162   lrow = row - brstart;  /* local row index */
1163 
1164   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1165   if (!v)   {pvA = 0; pvB = 0;}
1166   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1167   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1168   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1169   nztot = nzA + nzB;
1170 
1171   cmap  = mat->garray;
1172   if (v  || idx) {
1173     if (nztot) {
1174       /* Sort by increasing column numbers, assuming A and B already sorted */
1175       int imark = -1;
1176       if (v) {
1177         *v = v_p = mat->rowvalues;
1178         for (i=0; i<nzB; i++) {
1179           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1180           else break;
1181         }
1182         imark = i;
1183         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1184         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1185       }
1186       if (idx) {
1187         *idx = idx_p = mat->rowindices;
1188         if (imark > -1) {
1189           for (i=0; i<imark; i++) {
1190             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1191           }
1192         } else {
1193           for (i=0; i<nzB; i++) {
1194             if (cmap[cworkB[i]/bs] < cstart)
1195               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1196             else break;
1197           }
1198           imark = i;
1199         }
1200         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1201         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1202       }
1203     } else {
1204       if (idx) *idx = 0;
1205       if (v)   *v   = 0;
1206     }
1207   }
1208   *nz = nztot;
1209   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1210   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1216 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1217 {
1218   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1219 
1220   PetscFunctionBegin;
1221   if (baij->getrowactive == PETSC_FALSE) {
1222     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1223   }
1224   baij->getrowactive = PETSC_FALSE;
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1230 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1231 {
1232   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1233 
1234   PetscFunctionBegin;
1235   *bs = baij->bs;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1241 int MatZeroEntries_MPISBAIJ(Mat A)
1242 {
1243   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1244   int         ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1248   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1254 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1255 {
1256   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1257   Mat         A = a->A,B = a->B;
1258   int         ierr;
1259   PetscReal   isend[5],irecv[5];
1260 
1261   PetscFunctionBegin;
1262   info->block_size     = (PetscReal)a->bs;
1263   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1264   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1265   isend[3] = info->memory;  isend[4] = info->mallocs;
1266   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1267   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1268   isend[3] += info->memory;  isend[4] += info->mallocs;
1269   if (flag == MAT_LOCAL) {
1270     info->nz_used      = isend[0];
1271     info->nz_allocated = isend[1];
1272     info->nz_unneeded  = isend[2];
1273     info->memory       = isend[3];
1274     info->mallocs      = isend[4];
1275   } else if (flag == MAT_GLOBAL_MAX) {
1276     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1277     info->nz_used      = irecv[0];
1278     info->nz_allocated = irecv[1];
1279     info->nz_unneeded  = irecv[2];
1280     info->memory       = irecv[3];
1281     info->mallocs      = irecv[4];
1282   } else if (flag == MAT_GLOBAL_SUM) {
1283     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1284     info->nz_used      = irecv[0];
1285     info->nz_allocated = irecv[1];
1286     info->nz_unneeded  = irecv[2];
1287     info->memory       = irecv[3];
1288     info->mallocs      = irecv[4];
1289   } else {
1290     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1291   }
1292   info->rows_global       = (PetscReal)A->M;
1293   info->columns_global    = (PetscReal)A->N;
1294   info->rows_local        = (PetscReal)A->m;
1295   info->columns_local     = (PetscReal)A->N;
1296   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1297   info->fill_ratio_needed = 0;
1298   info->factor_mallocs    = 0;
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1304 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1305 {
1306   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1307   int         ierr;
1308 
1309   PetscFunctionBegin;
1310   switch (op) {
1311   case MAT_NO_NEW_NONZERO_LOCATIONS:
1312   case MAT_YES_NEW_NONZERO_LOCATIONS:
1313   case MAT_COLUMNS_UNSORTED:
1314   case MAT_COLUMNS_SORTED:
1315   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1316   case MAT_KEEP_ZEROED_ROWS:
1317   case MAT_NEW_NONZERO_LOCATION_ERR:
1318     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1319     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1320     break;
1321   case MAT_ROW_ORIENTED:
1322     a->roworiented = PETSC_TRUE;
1323     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1324     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1325     break;
1326   case MAT_ROWS_SORTED:
1327   case MAT_ROWS_UNSORTED:
1328   case MAT_YES_NEW_DIAGONALS:
1329     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1330     break;
1331   case MAT_COLUMN_ORIENTED:
1332     a->roworiented = PETSC_FALSE;
1333     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1334     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1335     break;
1336   case MAT_IGNORE_OFF_PROC_ENTRIES:
1337     a->donotstash = PETSC_TRUE;
1338     break;
1339   case MAT_NO_NEW_DIAGONALS:
1340     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1341   case MAT_USE_HASH_TABLE:
1342     a->ht_flag = PETSC_TRUE;
1343     break;
1344   case MAT_NOT_SYMMETRIC:
1345   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1346   case MAT_HERMITIAN:
1347     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1348   case MAT_SYMMETRIC:
1349   case MAT_STRUCTURALLY_SYMMETRIC:
1350   case MAT_NOT_HERMITIAN:
1351   case MAT_SYMMETRY_ETERNAL:
1352   case MAT_NOT_SYMMETRY_ETERNAL:
1353     break;
1354   default:
1355     SETERRQ(PETSC_ERR_SUP,"unknown option");
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1362 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1363 {
1364   int ierr;
1365   PetscFunctionBegin;
1366   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1372 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1373 {
1374   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1375   Mat         a = baij->A,b = baij->B;
1376   int         ierr,s1,s2,s3;
1377 
1378   PetscFunctionBegin;
1379   if (ll != rr) {
1380     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1381   }
1382   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1383   if (rr) {
1384     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1385     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1386     /* Overlap communication with computation. */
1387     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1388     /*} if (ll) { */
1389     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1390     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1391     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1392     /* } */
1393   /* scale  the diagonal block */
1394   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1395 
1396   /* if (rr) { */
1397     /* Do a scatter end and then right scale the off-diagonal block */
1398     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1399     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1400   }
1401 
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1407 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1408 {
1409   PetscFunctionBegin;
1410   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1415 int MatPrintHelp_MPISBAIJ(Mat A)
1416 {
1417   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1418   MPI_Comm    comm = A->comm;
1419   static int  called = 0;
1420   int         ierr;
1421 
1422   PetscFunctionBegin;
1423   if (!a->rank) {
1424     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1425   }
1426   if (called) {PetscFunctionReturn(0);} else called = 1;
1427   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1428   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1434 int MatSetUnfactored_MPISBAIJ(Mat A)
1435 {
1436   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1437   int         ierr;
1438 
1439   PetscFunctionBegin;
1440   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatEqual_MPISBAIJ"
1448 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1449 {
1450   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1451   Mat         a,b,c,d;
1452   PetscTruth  flg;
1453   int         ierr;
1454 
1455   PetscFunctionBegin;
1456   a = matA->A; b = matA->B;
1457   c = matB->A; d = matB->B;
1458 
1459   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1460   if (flg == PETSC_TRUE) {
1461     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1462   }
1463   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1469 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1470 {
1471   int        ierr;
1472 
1473   PetscFunctionBegin;
1474   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1480 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1481 {
1482   int        i,ierr;
1483   PetscTruth flg;
1484 
1485   for (i=0; i<n; i++) {
1486     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1487     if (!flg) {
1488       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1489     }
1490   }
1491   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 
1496 /* -------------------------------------------------------------------*/
1497 static struct _MatOps MatOps_Values = {
1498        MatSetValues_MPISBAIJ,
1499        MatGetRow_MPISBAIJ,
1500        MatRestoreRow_MPISBAIJ,
1501        MatMult_MPISBAIJ,
1502 /* 4*/ MatMultAdd_MPISBAIJ,
1503        MatMultTranspose_MPISBAIJ,
1504        MatMultTransposeAdd_MPISBAIJ,
1505        0,
1506        0,
1507        0,
1508 /*10*/ 0,
1509        0,
1510        0,
1511        MatRelax_MPISBAIJ,
1512        MatTranspose_MPISBAIJ,
1513 /*15*/ MatGetInfo_MPISBAIJ,
1514        MatEqual_MPISBAIJ,
1515        MatGetDiagonal_MPISBAIJ,
1516        MatDiagonalScale_MPISBAIJ,
1517        MatNorm_MPISBAIJ,
1518 /*20*/ MatAssemblyBegin_MPISBAIJ,
1519        MatAssemblyEnd_MPISBAIJ,
1520        0,
1521        MatSetOption_MPISBAIJ,
1522        MatZeroEntries_MPISBAIJ,
1523 /*25*/ MatZeroRows_MPISBAIJ,
1524        0,
1525        0,
1526        0,
1527        0,
1528 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*35*/ MatDuplicate_MPISBAIJ,
1534        0,
1535        0,
1536        0,
1537        0,
1538 /*40*/ 0,
1539        MatGetSubMatrices_MPISBAIJ,
1540        0,
1541        MatGetValues_MPISBAIJ,
1542        0,
1543 /*45*/ MatPrintHelp_MPISBAIJ,
1544        MatScale_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548 /*50*/ MatGetBlockSize_MPISBAIJ,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*55*/ 0,
1554        0,
1555        MatSetUnfactored_MPISBAIJ,
1556        0,
1557        MatSetValuesBlocked_MPISBAIJ,
1558 /*60*/ 0,
1559        0,
1560        0,
1561        MatGetPetscMaps_Petsc,
1562        0,
1563 /*65*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*70*/ MatGetRowMax_MPISBAIJ,
1569        0,
1570        0,
1571        0,
1572        0,
1573 /*75*/ 0,
1574        0,
1575        0,
1576        0,
1577        0,
1578 /*80*/ 0,
1579        0,
1580        0,
1581        0,
1582 /*85*/ MatLoad_MPISBAIJ
1583 };
1584 
1585 
1586 EXTERN_C_BEGIN
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1589 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1590 {
1591   PetscFunctionBegin;
1592   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1593   *iscopy = PETSC_FALSE;
1594   PetscFunctionReturn(0);
1595 }
1596 EXTERN_C_END
1597 
1598 EXTERN_C_BEGIN
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1601 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1602 {
1603   Mat_MPISBAIJ *b;
1604   int          ierr,i,mbs,Mbs;
1605 
1606   PetscFunctionBegin;
1607   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1608 
1609   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1610   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1611   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1612   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1613   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1614   if (d_nnz) {
1615     for (i=0; i<B->m/bs; i++) {
1616       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1617     }
1618   }
1619   if (o_nnz) {
1620     for (i=0; i<B->m/bs; i++) {
1621       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1622     }
1623   }
1624   B->preallocated = PETSC_TRUE;
1625   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1626   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1627   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1628   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1629 
1630   b   = (Mat_MPISBAIJ*)B->data;
1631   mbs = B->m/bs;
1632   Mbs = B->M/bs;
1633   if (mbs*bs != B->m) {
1634     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1635   }
1636 
1637   b->bs  = bs;
1638   b->bs2 = bs*bs;
1639   b->mbs = mbs;
1640   b->nbs = mbs;
1641   b->Mbs = Mbs;
1642   b->Nbs = Mbs;
1643 
1644   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1645   b->rowners[0]    = 0;
1646   for (i=2; i<=b->size; i++) {
1647     b->rowners[i] += b->rowners[i-1];
1648   }
1649   b->rstart    = b->rowners[b->rank];
1650   b->rend      = b->rowners[b->rank+1];
1651   b->cstart    = b->rstart;
1652   b->cend      = b->rend;
1653   for (i=0; i<=b->size; i++) {
1654     b->rowners_bs[i] = b->rowners[i]*bs;
1655   }
1656   b->rstart_bs = b-> rstart*bs;
1657   b->rend_bs   = b->rend*bs;
1658 
1659   b->cstart_bs = b->cstart*bs;
1660   b->cend_bs   = b->cend*bs;
1661 
1662 
1663   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1664   PetscLogObjectParent(B,b->A);
1665   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1666   PetscLogObjectParent(B,b->B);
1667 
1668   /* build cache for off array entries formed */
1669   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1670 
1671   PetscFunctionReturn(0);
1672 }
1673 EXTERN_C_END
1674 
1675 /*MC
1676    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1677    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1678 
1679    Options Database Keys:
1680 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1681 
1682   Level: beginner
1683 
1684 .seealso: MatCreateMPISBAIJ
1685 M*/
1686 
1687 EXTERN_C_BEGIN
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatCreate_MPISBAIJ"
1690 int MatCreate_MPISBAIJ(Mat B)
1691 {
1692   Mat_MPISBAIJ *b;
1693   int          ierr;
1694   PetscTruth   flg;
1695 
1696   PetscFunctionBegin;
1697 
1698   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1699   B->data = (void*)b;
1700   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1701   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1702 
1703   B->ops->destroy    = MatDestroy_MPISBAIJ;
1704   B->ops->view       = MatView_MPISBAIJ;
1705   B->mapping    = 0;
1706   B->factor     = 0;
1707   B->assembled  = PETSC_FALSE;
1708 
1709   B->insertmode = NOT_SET_VALUES;
1710   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1711   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1712 
1713   /* build local table of row and column ownerships */
1714   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1715   b->cowners    = b->rowners + b->size + 2;
1716   b->rowners_bs = b->cowners + b->size + 2;
1717   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1718 
1719   /* build cache for off array entries formed */
1720   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1721   b->donotstash  = PETSC_FALSE;
1722   b->colmap      = PETSC_NULL;
1723   b->garray      = PETSC_NULL;
1724   b->roworiented = PETSC_TRUE;
1725 
1726 #if defined(PETSC_USE_MAT_SINGLE)
1727   /* stuff for MatSetValues_XXX in single precision */
1728   b->setvalueslen     = 0;
1729   b->setvaluescopy    = PETSC_NULL;
1730 #endif
1731 
1732   /* stuff used in block assembly */
1733   b->barray       = 0;
1734 
1735   /* stuff used for matrix vector multiply */
1736   b->lvec         = 0;
1737   b->Mvctx        = 0;
1738   b->slvec0       = 0;
1739   b->slvec0b      = 0;
1740   b->slvec1       = 0;
1741   b->slvec1a      = 0;
1742   b->slvec1b      = 0;
1743   b->sMvctx       = 0;
1744 
1745   /* stuff for MatGetRow() */
1746   b->rowindices   = 0;
1747   b->rowvalues    = 0;
1748   b->getrowactive = PETSC_FALSE;
1749 
1750   /* hash table stuff */
1751   b->ht           = 0;
1752   b->hd           = 0;
1753   b->ht_size      = 0;
1754   b->ht_flag      = PETSC_FALSE;
1755   b->ht_fact      = 0;
1756   b->ht_total_ct  = 0;
1757   b->ht_insert_ct = 0;
1758 
1759   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1760   if (flg) {
1761     PetscReal fact = 1.39;
1762     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1763     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1764     if (fact <= 1.0) fact = 1.39;
1765     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1766     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1767   }
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1769                                      "MatStoreValues_MPISBAIJ",
1770                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1772                                      "MatRetrieveValues_MPISBAIJ",
1773                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1775                                      "MatGetDiagonalBlock_MPISBAIJ",
1776                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1778                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1779                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1780   PetscFunctionReturn(0);
1781 }
1782 EXTERN_C_END
1783 
1784 /*MC
1785    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1786 
1787    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1788    and MATMPISBAIJ otherwise.
1789 
1790    Options Database Keys:
1791 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1792 
1793   Level: beginner
1794 
1795 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1796 M*/
1797 
1798 EXTERN_C_BEGIN
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatCreate_SBAIJ"
1801 int MatCreate_SBAIJ(Mat A) {
1802   int ierr,size;
1803 
1804   PetscFunctionBegin;
1805   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1806   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1807   if (size == 1) {
1808     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1809   } else {
1810     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1811   }
1812   PetscFunctionReturn(0);
1813 }
1814 EXTERN_C_END
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1818 /*@C
1819    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1820    the user should preallocate the matrix storage by setting the parameters
1821    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1822    performance can be increased by more than a factor of 50.
1823 
1824    Collective on Mat
1825 
1826    Input Parameters:
1827 +  A - the matrix
1828 .  bs   - size of blockk
1829 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1830            submatrix  (same for all local rows)
1831 .  d_nnz - array containing the number of block nonzeros in the various block rows
1832            in the upper triangular and diagonal part of the in diagonal portion of the local
1833            (possibly different for each block row) or PETSC_NULL.  You must leave room
1834            for the diagonal entry even if it is zero.
1835 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1836            submatrix (same for all local rows).
1837 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1838            off-diagonal portion of the local submatrix (possibly different for
1839            each block row) or PETSC_NULL.
1840 
1841 
1842    Options Database Keys:
1843 .   -mat_no_unroll - uses code that does not unroll the loops in the
1844                      block calculations (much slower)
1845 .   -mat_block_size - size of the blocks to use
1846 
1847    Notes:
1848 
1849    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1850    than it must be used on all processors that share the object for that argument.
1851 
1852    Storage Information:
1853    For a square global matrix we define each processor's diagonal portion
1854    to be its local rows and the corresponding columns (a square submatrix);
1855    each processor's off-diagonal portion encompasses the remainder of the
1856    local matrix (a rectangular submatrix).
1857 
1858    The user can specify preallocated storage for the diagonal part of
1859    the local submatrix with either d_nz or d_nnz (not both).  Set
1860    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1861    memory allocation.  Likewise, specify preallocated storage for the
1862    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1863 
1864    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1865    the figure below we depict these three local rows and all columns (0-11).
1866 
1867 .vb
1868            0 1 2 3 4 5 6 7 8 9 10 11
1869           -------------------
1870    row 3  |  o o o d d d o o o o o o
1871    row 4  |  o o o d d d o o o o o o
1872    row 5  |  o o o d d d o o o o o o
1873           -------------------
1874 .ve
1875 
1876    Thus, any entries in the d locations are stored in the d (diagonal)
1877    submatrix, and any entries in the o locations are stored in the
1878    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1879    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1880 
1881    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1882    plus the diagonal part of the d matrix,
1883    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1884    In general, for PDE problems in which most nonzeros are near the diagonal,
1885    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1886    or you will get TERRIBLE performance; see the users' manual chapter on
1887    matrices.
1888 
1889    Level: intermediate
1890 
1891 .keywords: matrix, block, aij, compressed row, sparse, parallel
1892 
1893 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1894 @*/
1895 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1896 {
1897   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1898 
1899   PetscFunctionBegin;
1900   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1901   if (f) {
1902     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1903   }
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatCreateMPISBAIJ"
1909 /*@C
1910    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1911    (block compressed row).  For good matrix assembly performance
1912    the user should preallocate the matrix storage by setting the parameters
1913    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1914    performance can be increased by more than a factor of 50.
1915 
1916    Collective on MPI_Comm
1917 
1918    Input Parameters:
1919 +  comm - MPI communicator
1920 .  bs   - size of blockk
1921 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1922            This value should be the same as the local size used in creating the
1923            y vector for the matrix-vector product y = Ax.
1924 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1925            This value should be the same as the local size used in creating the
1926            x vector for the matrix-vector product y = Ax.
1927 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1928 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1929 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1930            submatrix  (same for all local rows)
1931 .  d_nnz - array containing the number of block nonzeros in the various block rows
1932            in the upper triangular portion of the in diagonal portion of the local
1933            (possibly different for each block block row) or PETSC_NULL.
1934            You must leave room for the diagonal entry even if it is zero.
1935 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1936            submatrix (same for all local rows).
1937 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1938            off-diagonal portion of the local submatrix (possibly different for
1939            each block row) or PETSC_NULL.
1940 
1941    Output Parameter:
1942 .  A - the matrix
1943 
1944    Options Database Keys:
1945 .   -mat_no_unroll - uses code that does not unroll the loops in the
1946                      block calculations (much slower)
1947 .   -mat_block_size - size of the blocks to use
1948 .   -mat_mpi - use the parallel matrix data structures even on one processor
1949                (defaults to using SeqBAIJ format on one processor)
1950 
1951    Notes:
1952    The user MUST specify either the local or global matrix dimensions
1953    (possibly both).
1954 
1955    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1956    than it must be used on all processors that share the object for that argument.
1957 
1958    Storage Information:
1959    For a square global matrix we define each processor's diagonal portion
1960    to be its local rows and the corresponding columns (a square submatrix);
1961    each processor's off-diagonal portion encompasses the remainder of the
1962    local matrix (a rectangular submatrix).
1963 
1964    The user can specify preallocated storage for the diagonal part of
1965    the local submatrix with either d_nz or d_nnz (not both).  Set
1966    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1967    memory allocation.  Likewise, specify preallocated storage for the
1968    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1969 
1970    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1971    the figure below we depict these three local rows and all columns (0-11).
1972 
1973 .vb
1974            0 1 2 3 4 5 6 7 8 9 10 11
1975           -------------------
1976    row 3  |  o o o d d d o o o o o o
1977    row 4  |  o o o d d d o o o o o o
1978    row 5  |  o o o d d d o o o o o o
1979           -------------------
1980 .ve
1981 
1982    Thus, any entries in the d locations are stored in the d (diagonal)
1983    submatrix, and any entries in the o locations are stored in the
1984    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1985    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1986 
1987    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1988    plus the diagonal part of the d matrix,
1989    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1990    In general, for PDE problems in which most nonzeros are near the diagonal,
1991    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1992    or you will get TERRIBLE performance; see the users' manual chapter on
1993    matrices.
1994 
1995    Level: intermediate
1996 
1997 .keywords: matrix, block, aij, compressed row, sparse, parallel
1998 
1999 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2000 @*/
2001 
2002 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2003 {
2004   int ierr,size;
2005 
2006   PetscFunctionBegin;
2007   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2008   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2009   if (size > 1) {
2010     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2011     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2012   } else {
2013     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2014     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2022 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2023 {
2024   Mat          mat;
2025   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2026   int          ierr,len=0;
2027 
2028   PetscFunctionBegin;
2029   *newmat       = 0;
2030   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2031   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2032   mat->preallocated = PETSC_TRUE;
2033   a = (Mat_MPISBAIJ*)mat->data;
2034   a->bs  = oldmat->bs;
2035   a->bs2 = oldmat->bs2;
2036   a->mbs = oldmat->mbs;
2037   a->nbs = oldmat->nbs;
2038   a->Mbs = oldmat->Mbs;
2039   a->Nbs = oldmat->Nbs;
2040 
2041   a->rstart       = oldmat->rstart;
2042   a->rend         = oldmat->rend;
2043   a->cstart       = oldmat->cstart;
2044   a->cend         = oldmat->cend;
2045   a->size         = oldmat->size;
2046   a->rank         = oldmat->rank;
2047   a->donotstash   = oldmat->donotstash;
2048   a->roworiented  = oldmat->roworiented;
2049   a->rowindices   = 0;
2050   a->rowvalues    = 0;
2051   a->getrowactive = PETSC_FALSE;
2052   a->barray       = 0;
2053   a->rstart_bs    = oldmat->rstart_bs;
2054   a->rend_bs      = oldmat->rend_bs;
2055   a->cstart_bs    = oldmat->cstart_bs;
2056   a->cend_bs      = oldmat->cend_bs;
2057 
2058   /* hash table stuff */
2059   a->ht           = 0;
2060   a->hd           = 0;
2061   a->ht_size      = 0;
2062   a->ht_flag      = oldmat->ht_flag;
2063   a->ht_fact      = oldmat->ht_fact;
2064   a->ht_total_ct  = 0;
2065   a->ht_insert_ct = 0;
2066 
2067   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2068   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2069   a->cowners    = a->rowners + a->size + 2;
2070   a->rowners_bs = a->cowners + a->size + 2;
2071   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2072   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2073   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2074   if (oldmat->colmap) {
2075 #if defined (PETSC_USE_CTABLE)
2076     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2077 #else
2078     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2079     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2080     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2081 #endif
2082   } else a->colmap = 0;
2083   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2084     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2085     PetscLogObjectMemory(mat,len*sizeof(int));
2086     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2087   } else a->garray = 0;
2088 
2089   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2090   PetscLogObjectParent(mat,a->lvec);
2091   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2092 
2093   PetscLogObjectParent(mat,a->Mvctx);
2094   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2095   PetscLogObjectParent(mat,a->A);
2096   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2097   PetscLogObjectParent(mat,a->B);
2098   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2099   *newmat = mat;
2100   PetscFunctionReturn(0);
2101 }
2102 
2103 #include "petscsys.h"
2104 
2105 #undef __FUNCT__
2106 #define __FUNCT__ "MatLoad_MPISBAIJ"
2107 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2108 {
2109   Mat          A;
2110   int          i,nz,ierr,j,rstart,rend,fd;
2111   PetscScalar  *vals,*buf;
2112   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2113   MPI_Status   status;
2114   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2115   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2116   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2117   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2118   int          dcount,kmax,k,nzcount,tmp;
2119 
2120   PetscFunctionBegin;
2121   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2122 
2123   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2124   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2125   if (!rank) {
2126     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2127     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2128     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2129     if (header[3] < 0) {
2130       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2131     }
2132   }
2133 
2134   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2135   M = header[1]; N = header[2];
2136 
2137   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2138 
2139   /*
2140      This code adds extra rows to make sure the number of rows is
2141      divisible by the blocksize
2142   */
2143   Mbs        = M/bs;
2144   extra_rows = bs - M + bs*(Mbs);
2145   if (extra_rows == bs) extra_rows = 0;
2146   else                  Mbs++;
2147   if (extra_rows &&!rank) {
2148     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2149   }
2150 
2151   /* determine ownership of all rows */
2152   mbs        = Mbs/size + ((Mbs % size) > rank);
2153   m          = mbs*bs;
2154   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2155   browners   = rowners + size + 1;
2156   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2157   rowners[0] = 0;
2158   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2159   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2160   rstart = rowners[rank];
2161   rend   = rowners[rank+1];
2162 
2163   /* distribute row lengths to all processors */
2164   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2165   if (!rank) {
2166     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2167     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2168     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2169     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2170     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2171     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2172     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2173   } else {
2174     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2175   }
2176 
2177   if (!rank) {   /* procs[0] */
2178     /* calculate the number of nonzeros on each processor */
2179     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2180     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2181     for (i=0; i<size; i++) {
2182       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2183         procsnz[i] += rowlengths[j];
2184       }
2185     }
2186     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2187 
2188     /* determine max buffer needed and allocate it */
2189     maxnz = 0;
2190     for (i=0; i<size; i++) {
2191       maxnz = PetscMax(maxnz,procsnz[i]);
2192     }
2193     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2194 
2195     /* read in my part of the matrix column indices  */
2196     nz     = procsnz[0];
2197     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2198     mycols = ibuf;
2199     if (size == 1)  nz -= extra_rows;
2200     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2201     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2202 
2203     /* read in every ones (except the last) and ship off */
2204     for (i=1; i<size-1; i++) {
2205       nz   = procsnz[i];
2206       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2207       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2208     }
2209     /* read in the stuff for the last proc */
2210     if (size != 1) {
2211       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2212       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2213       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2214       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2215     }
2216     ierr = PetscFree(cols);CHKERRQ(ierr);
2217   } else {  /* procs[i], i>0 */
2218     /* determine buffer space needed for message */
2219     nz = 0;
2220     for (i=0; i<m; i++) {
2221       nz += locrowlens[i];
2222     }
2223     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2224     mycols = ibuf;
2225     /* receive message of column indices*/
2226     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2227     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2228     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2229   }
2230 
2231   /* loop over local rows, determining number of off diagonal entries */
2232   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2233   odlens   = dlens + (rend-rstart);
2234   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2235   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2236   masked1  = mask    + Mbs;
2237   masked2  = masked1 + Mbs;
2238   rowcount = 0; nzcount = 0;
2239   for (i=0; i<mbs; i++) {
2240     dcount  = 0;
2241     odcount = 0;
2242     for (j=0; j<bs; j++) {
2243       kmax = locrowlens[rowcount];
2244       for (k=0; k<kmax; k++) {
2245         tmp = mycols[nzcount++]/bs; /* block col. index */
2246         if (!mask[tmp]) {
2247           mask[tmp] = 1;
2248           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2249           else masked1[dcount++] = tmp; /* entry in diag portion */
2250         }
2251       }
2252       rowcount++;
2253     }
2254 
2255     dlens[i]  = dcount;  /* d_nzz[i] */
2256     odlens[i] = odcount; /* o_nzz[i] */
2257 
2258     /* zero out the mask elements we set */
2259     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2260     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2261   }
2262 
2263   /* create our matrix */
2264   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2265   ierr = MatSetType(A,type);CHKERRQ(ierr);
2266   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2267   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2268 
2269   if (!rank) {
2270     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2271     /* read in my part of the matrix numerical values  */
2272     nz = procsnz[0];
2273     vals = buf;
2274     mycols = ibuf;
2275     if (size == 1)  nz -= extra_rows;
2276     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2277     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2278 
2279     /* insert into matrix */
2280     jj      = rstart*bs;
2281     for (i=0; i<m; i++) {
2282       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2283       mycols += locrowlens[i];
2284       vals   += locrowlens[i];
2285       jj++;
2286     }
2287 
2288     /* read in other processors (except the last one) and ship out */
2289     for (i=1; i<size-1; i++) {
2290       nz   = procsnz[i];
2291       vals = buf;
2292       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2293       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2294     }
2295     /* the last proc */
2296     if (size != 1){
2297       nz   = procsnz[i] - extra_rows;
2298       vals = buf;
2299       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2300       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2301       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2302     }
2303     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2304 
2305   } else {
2306     /* receive numeric values */
2307     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2308 
2309     /* receive message of values*/
2310     vals   = buf;
2311     mycols = ibuf;
2312     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2313     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2314     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2315 
2316     /* insert into matrix */
2317     jj      = rstart*bs;
2318     for (i=0; i<m; i++) {
2319       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2320       mycols += locrowlens[i];
2321       vals   += locrowlens[i];
2322       jj++;
2323     }
2324   }
2325 
2326   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2327   ierr = PetscFree(buf);CHKERRQ(ierr);
2328   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2329   ierr = PetscFree(rowners);CHKERRQ(ierr);
2330   ierr = PetscFree(dlens);CHKERRQ(ierr);
2331   ierr = PetscFree(mask);CHKERRQ(ierr);
2332   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2333   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2334   *newmat = A;
2335   PetscFunctionReturn(0);
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2340 /*@
2341    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2342 
2343    Input Parameters:
2344 .  mat  - the matrix
2345 .  fact - factor
2346 
2347    Collective on Mat
2348 
2349    Level: advanced
2350 
2351   Notes:
2352    This can also be set by the command line option: -mat_use_hash_table fact
2353 
2354 .keywords: matrix, hashtable, factor, HT
2355 
2356 .seealso: MatSetOption()
2357 @*/
2358 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2359 {
2360   PetscFunctionBegin;
2361   SETERRQ(1,"Function not yet written for SBAIJ format");
2362   /* PetscFunctionReturn(0); */
2363 }
2364 
2365 #undef __FUNCT__
2366 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2367 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2368 {
2369   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2370   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2371   PetscReal    atmp;
2372   PetscReal    *work,*svalues,*rvalues;
2373   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2374   int          rank,size,*rowners_bs,dest,count,source;
2375   PetscScalar  *va;
2376   MatScalar    *ba;
2377   MPI_Status   stat;
2378 
2379   PetscFunctionBegin;
2380   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2381   ierr = VecGetArrayFast(v,&va);CHKERRQ(ierr);
2382 
2383   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2384   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2385 
2386   bs   = a->bs;
2387   mbs  = a->mbs;
2388   Mbs  = a->Mbs;
2389   ba   = b->a;
2390   bi   = b->i;
2391   bj   = b->j;
2392   /*
2393   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2394   PetscSynchronizedFlush(A->comm);
2395   */
2396 
2397   /* find ownerships */
2398   rowners_bs = a->rowners_bs;
2399   /*
2400   if (!rank){
2401     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2402   }
2403   */
2404 
2405   /* each proc creates an array to be distributed */
2406   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2407   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2408 
2409   /* row_max for B */
2410   if (rank != size-1){
2411     for (i=0; i<mbs; i++) {
2412       ncols = bi[1] - bi[0]; bi++;
2413       brow  = bs*i;
2414       for (j=0; j<ncols; j++){
2415         bcol = bs*(*bj);
2416         for (kcol=0; kcol<bs; kcol++){
2417           col = bcol + kcol;                 /* local col index */
2418           col += rowners_bs[rank+1];      /* global col index */
2419           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2420           for (krow=0; krow<bs; krow++){
2421             atmp = PetscAbsScalar(*ba); ba++;
2422             row = brow + krow;    /* local row index */
2423             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2424             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2425             if (work[col] < atmp) work[col] = atmp;
2426           }
2427         }
2428         bj++;
2429       }
2430     }
2431     /*
2432       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2433       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2434       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2435       */
2436 
2437     /* send values to its owners */
2438     for (dest=rank+1; dest<size; dest++){
2439       svalues = work + rowners_bs[dest];
2440       count   = rowners_bs[dest+1]-rowners_bs[dest];
2441       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2442       /*
2443       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2444       PetscSynchronizedFlush(A->comm);
2445       */
2446     }
2447   }
2448 
2449   /* receive values */
2450   if (rank){
2451     rvalues = work;
2452     count   = rowners_bs[rank+1]-rowners_bs[rank];
2453     for (source=0; source<rank; source++){
2454       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2455       /* process values */
2456       for (i=0; i<count; i++){
2457         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2458       }
2459       /*
2460       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2461       PetscSynchronizedFlush(A->comm);
2462       */
2463     }
2464   }
2465 
2466   ierr = VecRestoreArrayFast(v,&va);CHKERRQ(ierr);
2467   ierr = PetscFree(work);CHKERRQ(ierr);
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatRelax_MPISBAIJ"
2473 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2474 {
2475   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2476   int            ierr,mbs=mat->mbs,bs=mat->bs;
2477   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2478   Vec            bb1;
2479 
2480   PetscFunctionBegin;
2481   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2482   if (bs > 1)
2483     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2484 
2485   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2486     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2487       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2488       its--;
2489     }
2490 
2491     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2492     while (its--){
2493 
2494       /* lower triangular part: slvec0b = - B^T*xx */
2495       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2496 
2497       /* copy xx into slvec0a */
2498       ierr = VecGetArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr);
2499       ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2500       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2501       ierr = VecRestoreArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr);
2502 
2503       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2504 
2505       /* copy bb into slvec1a */
2506       ierr = VecGetArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr);
2507       ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
2508       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2509       ierr = VecRestoreArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr);
2510 
2511       /* set slvec1b = 0 */
2512       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2513 
2514       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2515       ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2516       ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
2517       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2518 
2519       /* upper triangular part: bb1 = bb1 - B*x */
2520       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2521 
2522       /* local diagonal sweep */
2523       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2524     }
2525     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2526   } else {
2527     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2528   }
2529   PetscFunctionReturn(0);
2530 }
2531 
2532 #undef __FUNCT__
2533 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2534 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2535 {
2536   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2537   int            ierr;
2538   PetscScalar    mone=-1.0;
2539   Vec            lvec1,bb1;
2540 
2541   PetscFunctionBegin;
2542   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2543   if (mat->bs > 1)
2544     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2545 
2546   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2547     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2548       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2549       its--;
2550     }
2551 
2552     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2553     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2554     while (its--){
2555       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2556 
2557       /* lower diagonal part: bb1 = bb - B^T*xx */
2558       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2559       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2560 
2561       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2562       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2563       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2564 
2565       /* upper diagonal part: bb1 = bb1 - B*x */
2566       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2567       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2568 
2569       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2570 
2571       /* diagonal sweep */
2572       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2573     }
2574     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2575     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2576   } else {
2577     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2578   }
2579   PetscFunctionReturn(0);
2580 }
2581 
2582