xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 82327fa8fa4257dfbf0f0c9445cc22b8db1af7d6)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
112         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
113         new_j = (int*)(new_a + bs2*new_nz); \
114         new_i = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 #ifndef MatSetValues_SeqBAIJ_B_Private
155 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
185  \
186         /* malloc new storage space */ \
187         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
188         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
189         new_j = (int*)(new_a + bs2*new_nz); \
190         new_i = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 #endif
231 
232 #if defined(PETSC_USE_MAT_SINGLE)
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValues_MPISBAIJ"
235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
238   int          ierr,i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   int         ierr,i,N = m*n*b->bs2;
262   MatScalar   *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283   int         ierr,i,N = m*n;
284   MatScalar   *vsingle;
285 
286   PetscFunctionBegin;
287   SETERRQ(1,"Function not yet written for SBAIJ format");
288   /* PetscFunctionReturn(0); */
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
294 {
295   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
296   int         ierr,i,N = m*n*b->bs2;
297   MatScalar   *vsingle;
298 
299   PetscFunctionBegin;
300   SETERRQ(1,"Function not yet written for SBAIJ format");
301   /* PetscFunctionReturn(0); */
302 }
303 #endif
304 
305 /* Only add/insert a(i,j) with i<=j (blocks).
306    Any a(i,j) with i>j input by user is ingored.
307 */
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
311 {
312   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
313   MatScalar    value;
314   PetscTruth   roworiented = baij->roworiented;
315   int          ierr,i,j,row,col;
316   int          rstart_orig=baij->rstart_bs;
317   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
318   int          cend_orig=baij->cend_bs,bs=baij->bs;
319 
320   /* Some Variables required in the macro */
321   Mat          A = baij->A;
322   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
323   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
324   MatScalar    *aa=a->a;
325 
326   Mat          B = baij->B;
327   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
328   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
329   MatScalar    *ba=b->a;
330 
331   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
332   int          low,high,t,ridx,cidx,bs2=a->bs2;
333   MatScalar    *ap,*bap;
334 
335   /* for stash */
336   int          n_loc, *in_loc=0;
337   MatScalar    *v_loc=0;
338 
339   PetscFunctionBegin;
340 
341   if(!baij->donotstash){
342     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
343     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
344   }
345 
346   for (i=0; i<m; i++) {
347     if (im[i] < 0) continue;
348 #if defined(PETSC_USE_BOPT_g)
349     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
350 #endif
351     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
352       row = im[i] - rstart_orig;              /* local row index */
353       for (j=0; j<n; j++) {
354         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
355         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
356           col = in[j] - cstart_orig;          /* local col index */
357           brow = row/bs; bcol = col/bs;
358           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
359           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
360           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
361           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
362         } else if (in[j] < 0) continue;
363 #if defined(PETSC_USE_BOPT_g)
364         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
365 #endif
366         else {  /* off-diag entry (B) */
367           if (mat->was_assembled) {
368             if (!baij->colmap) {
369               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
370             }
371 #if defined (PETSC_USE_CTABLE)
372             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
373             col  = col - 1;
374 #else
375             col = baij->colmap[in[j]/bs] - 1;
376 #endif
377             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
378               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
379               col =  in[j];
380               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
381               B = baij->B;
382               b = (Mat_SeqBAIJ*)(B)->data;
383               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
384               ba=b->a;
385             } else col += in[j]%bs;
386           } else col = in[j];
387           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
388           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
389           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
390         }
391       }
392     } else {  /* off processor entry */
393       if (!baij->donotstash) {
394         n_loc = 0;
395         for (j=0; j<n; j++){
396           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
397           in_loc[n_loc] = in[j];
398           if (roworiented) {
399             v_loc[n_loc] = v[i*n+j];
400           } else {
401             v_loc[n_loc] = v[j*m+i];
402           }
403           n_loc++;
404         }
405         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
406       }
407     }
408   }
409 
410   if(!baij->donotstash){
411     ierr = PetscFree(in_loc);CHKERRQ(ierr);
412     ierr = PetscFree(v_loc);CHKERRQ(ierr);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
420 {
421   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
422   const MatScalar *value;
423   MatScalar       *barray=baij->barray;
424   PetscTruth      roworiented = baij->roworiented;
425   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
426   int             rend=baij->rend,cstart=baij->cstart,stepval;
427   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
428 
429   PetscFunctionBegin;
430   if(!barray) {
431     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
432     baij->barray = barray;
433   }
434 
435   if (roworiented) {
436     stepval = (n-1)*bs;
437   } else {
438     stepval = (m-1)*bs;
439   }
440   for (i=0; i<m; i++) {
441     if (im[i] < 0) continue;
442 #if defined(PETSC_USE_BOPT_g)
443     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
444 #endif
445     if (im[i] >= rstart && im[i] < rend) {
446       row = im[i] - rstart;
447       for (j=0; j<n; j++) {
448         /* If NumCol = 1 then a copy is not required */
449         if ((roworiented) && (n == 1)) {
450           barray = (MatScalar*) v + i*bs2;
451         } else if((!roworiented) && (m == 1)) {
452           barray = (MatScalar*) v + j*bs2;
453         } else { /* Here a copy is required */
454           if (roworiented) {
455             value = v + i*(stepval+bs)*bs + j*bs;
456           } else {
457             value = v + j*(stepval+bs)*bs + i*bs;
458           }
459           for (ii=0; ii<bs; ii++,value+=stepval) {
460             for (jj=0; jj<bs; jj++) {
461               *barray++  = *value++;
462             }
463           }
464           barray -=bs2;
465         }
466 
467         if (in[j] >= cstart && in[j] < cend){
468           col  = in[j] - cstart;
469           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
470         }
471         else if (in[j] < 0) continue;
472 #if defined(PETSC_USE_BOPT_g)
473         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
474 #endif
475         else {
476           if (mat->was_assembled) {
477             if (!baij->colmap) {
478               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
479             }
480 
481 #if defined(PETSC_USE_BOPT_g)
482 #if defined (PETSC_USE_CTABLE)
483             { int data;
484               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
485               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
486             }
487 #else
488             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
489 #endif
490 #endif
491 #if defined (PETSC_USE_CTABLE)
492 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
493             col  = (col - 1)/bs;
494 #else
495             col = (baij->colmap[in[j]] - 1)/bs;
496 #endif
497             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
498               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
499               col =  in[j];
500             }
501           }
502           else col = in[j];
503           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
504         }
505       }
506     } else {
507       if (!baij->donotstash) {
508         if (roworiented) {
509           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
510         } else {
511           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
512         }
513       }
514     }
515   }
516   PetscFunctionReturn(0);
517 }
518 
519 #define HASH_KEY 0.6180339887
520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
523 #undef __FUNCT__
524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
526 {
527   PetscFunctionBegin;
528   SETERRQ(1,"Function not yet written for SBAIJ format");
529   /* PetscFunctionReturn(0); */
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
535 {
536   PetscFunctionBegin;
537   SETERRQ(1,"Function not yet written for SBAIJ format");
538   /* PetscFunctionReturn(0); */
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatGetValues_MPISBAIJ"
543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
544 {
545   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
546   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
547   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
552     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
553     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
554       row = idxm[i] - bsrstart;
555       for (j=0; j<n; j++) {
556         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
557         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
558         if (idxn[j] >= bscstart && idxn[j] < bscend){
559           col = idxn[j] - bscstart;
560           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
561         } else {
562           if (!baij->colmap) {
563             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
564           }
565 #if defined (PETSC_USE_CTABLE)
566           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
567           data --;
568 #else
569           data = baij->colmap[idxn[j]/bs]-1;
570 #endif
571           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
572           else {
573             col  = data + idxn[j]%bs;
574             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575           }
576         }
577       }
578     } else {
579       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
580     }
581   }
582  PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatNorm_MPISBAIJ"
587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
588 {
589   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
590   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
591   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
592   int        ierr;
593   PetscReal  sum[2],*lnorm2;
594 
595   PetscFunctionBegin;
596   if (baij->size == 1) {
597     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
598   } else {
599     if (type == NORM_FROBENIUS) {
600       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
601       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
602       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
603       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
605       /*
606       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
607       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
608       */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       /*
611       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
612       PetscSynchronizedFlush(mat->comm); */
613 
614       *norm = sqrt(sum[0] + 2*sum[1]);
615       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 /*
624   Creates the hash table, and sets the table
625   This table is created only once.
626   If new entried need to be added to the matrix
627   then the hash table has to be destroyed and
628   recreated.
629 */
630 #undef __FUNCT__
631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
633 {
634   PetscFunctionBegin;
635   SETERRQ(1,"Function not yet written for SBAIJ format");
636   /* PetscFunctionReturn(0); */
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
642 {
643   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
644   int         ierr,nstash,reallocs;
645   InsertMode  addv;
646 
647   PetscFunctionBegin;
648   if (baij->donotstash) {
649     PetscFunctionReturn(0);
650   }
651 
652   /* make sure all processors are either in INSERTMODE or ADDMODE */
653   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
654   if (addv == (ADD_VALUES|INSERT_VALUES)) {
655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
656   }
657   mat->insertmode = addv; /* in case this processor had no cache */
658 
659   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
673   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
674   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
675   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
676   int         *row,*col,other_disassembled;
677   PetscTruth  r1,r2,r3;
678   MatScalar   *val;
679   InsertMode  addv = mat->insertmode;
680 
681   PetscFunctionBegin;
682 
683   if (!baij->donotstash) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       /*
687       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
688       PetscSynchronizedFlush(mat->comm);
689       */
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
695         if (j < n) ncols = j-i;
696         else       ncols = n-i;
697         /* Now assemble all these values with a single function call */
698         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703     /* Now process the block-stash. Since the values are stashed column-oriented,
704        set the roworiented flag to column oriented, and after MatSetValues()
705        restore the original flags */
706     r1 = baij->roworiented;
707     r2 = a->roworiented;
708     r3 = b->roworiented;
709     baij->roworiented = PETSC_FALSE;
710     a->roworiented    = PETSC_FALSE;
711     b->roworiented    = PETSC_FALSE;
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n;) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
719         if (j < n) ncols = j-i;
720         else       ncols = n-i;
721         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
722         i = j;
723       }
724     }
725     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
726     baij->roworiented = r1;
727     a->roworiented    = r2;
728     b->roworiented    = r3;
729   }
730 
731   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746 
747   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
748     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
749   }
750   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
752 
753 #if defined(PETSC_USE_BOPT_g)
754   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
755     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
756     baij->ht_total_ct  = 0;
757     baij->ht_insert_ct = 0;
758   }
759 #endif
760   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
761     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
762     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
763     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
764   }
765 
766   if (baij->rowvalues) {
767     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
768     baij->rowvalues = 0;
769   }
770 
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
779   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
780   PetscTruth        isascii,isdraw;
781   PetscViewer       sviewer;
782   PetscViewerFormat format;
783 
784   PetscFunctionBegin;
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
786   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
787   if (isascii) {
788     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
789     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
790       MatInfo info;
791       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
794               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
795               baij->bs,(int)info.memory);CHKERRQ(ierr);
796       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
797       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
798       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
799       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
800       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
801       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_INFO) {
804       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw       draw;
811     PetscTruth isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
814   }
815 
816   if (size == 1) {
817     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
818     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
819   } else {
820     /* assemble the entire matrix onto first processor. */
821     Mat         A;
822     Mat_SeqSBAIJ *Aloc;
823     Mat_SeqBAIJ *Bloc;
824     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
825     MatScalar   *a;
826 
827     if (!rank) {
828       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
829     } else {
830       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
831     }
832     PetscLogObjectParent(mat,A);
833 
834     /* copy over the A part */
835     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
836     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
837     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
838 
839     for (i=0; i<mbs; i++) {
840       rvals[0] = bs*(baij->rstart + i);
841       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
842       for (j=ai[i]; j<ai[i+1]; j++) {
843         col = (baij->cstart+aj[j])*bs;
844         for (k=0; k<bs; k++) {
845           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
846           col++; a += bs;
847         }
848       }
849     }
850     /* copy over the B part */
851     Bloc = (Mat_SeqBAIJ*)baij->B->data;
852     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = baij->garray[aj[j]]*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     ierr = PetscFree(rvals);CHKERRQ(ierr);
865     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867     /*
868        Everyone has to call to draw the matrix since the graphics waits are
869        synchronized across all processors that share the PetscDraw object
870     */
871     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
872     if (!rank) {
873       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
874       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
875     }
876     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
877     ierr = MatDestroy(A);CHKERRQ(ierr);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatView_MPISBAIJ"
884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
885 {
886   int        ierr;
887   PetscTruth isascii,isdraw,issocket,isbinary;
888 
889   PetscFunctionBegin;
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
894   if (isascii || isdraw || issocket || isbinary) {
895     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
896   } else {
897     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatDestroy_MPISBAIJ"
904 int MatDestroy_MPISBAIJ(Mat mat)
905 {
906   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
907   int         ierr;
908 
909   PetscFunctionBegin;
910 #if defined(PETSC_USE_LOG)
911   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
912 #endif
913   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
914   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
915   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
917   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
918 #if defined (PETSC_USE_CTABLE)
919   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
920 #else
921   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
922 #endif
923   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
924   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
925   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
926   if (baij->slvec0) {
927     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
928     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
929   }
930   if (baij->slvec1) {
931     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
933     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
934   }
935   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
936   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
937   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
938   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
939 #if defined(PETSC_USE_MAT_SINGLE)
940   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
941 #endif
942   ierr = PetscFree(baij);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMult_MPISBAIJ"
948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
949 {
950   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
951   int         ierr,nt,mbs=a->mbs,bs=a->bs;
952   PetscScalar *x,*from,zero=0.0;
953 
954   PetscFunctionBegin;
955   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
956   if (nt != A->n) {
957     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
958   }
959   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
960   if (nt != A->m) {
961     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
962   }
963 
964   /* diagonal part */
965   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
966   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
967 
968   /* subdiagonal part */
969   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
970 
971   /* copy x into the vec slvec0 */
972   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
973   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
974   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
975   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
976 
977   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
978   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
980 
981   /* supperdiagonal part */
982   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
983 
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
989 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
990 {
991   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
992   int         ierr,nt;
993 
994   PetscFunctionBegin;
995   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
996   if (nt != A->n) {
997     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
998   }
999   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1000   if (nt != A->m) {
1001     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1002   }
1003 
1004   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1005   /* do diagonal part */
1006   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1007   /* do supperdiagonal part */
1008   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1009   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1010   /* do subdiagonal part */
1011   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1012   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1013   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1014 
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1020 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1023   int          ierr,mbs=a->mbs,bs=a->bs;
1024   PetscScalar  *x,*from,zero=0.0;
1025 
1026   PetscFunctionBegin;
1027   /*
1028   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1029   PetscSynchronizedFlush(A->comm);
1030   */
1031   /* diagonal part */
1032   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1033   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1034 
1035   /* subdiagonal part */
1036   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1037 
1038   /* copy x into the vec slvec0 */
1039   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1040   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1041   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1042   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1043 
1044   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1045   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1046   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1047 
1048   /* supperdiagonal part */
1049   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1050 
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1056 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1057 {
1058   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1059   int        ierr;
1060 
1061   PetscFunctionBegin;
1062   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1063   /* do diagonal part */
1064   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1065   /* do supperdiagonal part */
1066   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1067   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1068 
1069   /* do subdiagonal part */
1070   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1071   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1072   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1073 
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1079 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   int ierr;
1082 
1083   PetscFunctionBegin;
1084   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1090 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1091 {
1092   int ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /*
1100   This only works correctly for square matrices where the subblock A->A is the
1101    diagonal block
1102 */
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1105 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1106 {
1107   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1108   int         ierr;
1109 
1110   PetscFunctionBegin;
1111   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1112   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatScale_MPISBAIJ"
1118 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1119 {
1120   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1121   int         ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1125   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1131 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1132 {
1133   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1134   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1135   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1136   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1137   int            *cmap,*idx_p,cstart = mat->cstart;
1138 
1139   PetscFunctionBegin;
1140   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1141   mat->getrowactive = PETSC_TRUE;
1142 
1143   if (!mat->rowvalues && (idx || v)) {
1144     /*
1145         allocate enough space to hold information from the longest row.
1146     */
1147     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1148     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1149     int     max = 1,mbs = mat->mbs,tmp;
1150     for (i=0; i<mbs; i++) {
1151       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1152       if (max < tmp) { max = tmp; }
1153     }
1154     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1155     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1156   }
1157 
1158   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1159   lrow = row - brstart;  /* local row index */
1160 
1161   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1162   if (!v)   {pvA = 0; pvB = 0;}
1163   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1164   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1165   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1166   nztot = nzA + nzB;
1167 
1168   cmap  = mat->garray;
1169   if (v  || idx) {
1170     if (nztot) {
1171       /* Sort by increasing column numbers, assuming A and B already sorted */
1172       int imark = -1;
1173       if (v) {
1174         *v = v_p = mat->rowvalues;
1175         for (i=0; i<nzB; i++) {
1176           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1177           else break;
1178         }
1179         imark = i;
1180         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1181         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1182       }
1183       if (idx) {
1184         *idx = idx_p = mat->rowindices;
1185         if (imark > -1) {
1186           for (i=0; i<imark; i++) {
1187             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1188           }
1189         } else {
1190           for (i=0; i<nzB; i++) {
1191             if (cmap[cworkB[i]/bs] < cstart)
1192               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1193             else break;
1194           }
1195           imark = i;
1196         }
1197         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1198         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1199       }
1200     } else {
1201       if (idx) *idx = 0;
1202       if (v)   *v   = 0;
1203     }
1204   }
1205   *nz = nztot;
1206   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1207   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1213 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1214 {
1215   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1216 
1217   PetscFunctionBegin;
1218   if (baij->getrowactive == PETSC_FALSE) {
1219     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1220   }
1221   baij->getrowactive = PETSC_FALSE;
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1227 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1228 {
1229   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1230 
1231   PetscFunctionBegin;
1232   *bs = baij->bs;
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1238 int MatZeroEntries_MPISBAIJ(Mat A)
1239 {
1240   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1241   int         ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1245   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1251 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1252 {
1253   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1254   Mat         A = a->A,B = a->B;
1255   int         ierr;
1256   PetscReal   isend[5],irecv[5];
1257 
1258   PetscFunctionBegin;
1259   info->block_size     = (PetscReal)a->bs;
1260   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1261   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1262   isend[3] = info->memory;  isend[4] = info->mallocs;
1263   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1264   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1265   isend[3] += info->memory;  isend[4] += info->mallocs;
1266   if (flag == MAT_LOCAL) {
1267     info->nz_used      = isend[0];
1268     info->nz_allocated = isend[1];
1269     info->nz_unneeded  = isend[2];
1270     info->memory       = isend[3];
1271     info->mallocs      = isend[4];
1272   } else if (flag == MAT_GLOBAL_MAX) {
1273     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1274     info->nz_used      = irecv[0];
1275     info->nz_allocated = irecv[1];
1276     info->nz_unneeded  = irecv[2];
1277     info->memory       = irecv[3];
1278     info->mallocs      = irecv[4];
1279   } else if (flag == MAT_GLOBAL_SUM) {
1280     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1281     info->nz_used      = irecv[0];
1282     info->nz_allocated = irecv[1];
1283     info->nz_unneeded  = irecv[2];
1284     info->memory       = irecv[3];
1285     info->mallocs      = irecv[4];
1286   } else {
1287     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1288   }
1289   info->rows_global       = (PetscReal)A->M;
1290   info->columns_global    = (PetscReal)A->N;
1291   info->rows_local        = (PetscReal)A->m;
1292   info->columns_local     = (PetscReal)A->N;
1293   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1294   info->fill_ratio_needed = 0;
1295   info->factor_mallocs    = 0;
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1301 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1302 {
1303   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1304   int         ierr;
1305 
1306   PetscFunctionBegin;
1307   switch (op) {
1308   case MAT_NO_NEW_NONZERO_LOCATIONS:
1309   case MAT_YES_NEW_NONZERO_LOCATIONS:
1310   case MAT_COLUMNS_UNSORTED:
1311   case MAT_COLUMNS_SORTED:
1312   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1313   case MAT_KEEP_ZEROED_ROWS:
1314   case MAT_NEW_NONZERO_LOCATION_ERR:
1315     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1316     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1317     break;
1318   case MAT_ROW_ORIENTED:
1319     a->roworiented = PETSC_TRUE;
1320     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1321     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1322     break;
1323   case MAT_ROWS_SORTED:
1324   case MAT_ROWS_UNSORTED:
1325   case MAT_YES_NEW_DIAGONALS:
1326     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1327     break;
1328   case MAT_COLUMN_ORIENTED:
1329     a->roworiented = PETSC_FALSE;
1330     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1331     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1332     break;
1333   case MAT_IGNORE_OFF_PROC_ENTRIES:
1334     a->donotstash = PETSC_TRUE;
1335     break;
1336   case MAT_NO_NEW_DIAGONALS:
1337     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1338   case MAT_USE_HASH_TABLE:
1339     a->ht_flag = PETSC_TRUE;
1340     break;
1341   case MAT_NOT_SYMMETRIC:
1342   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1343   case MAT_HERMITIAN:
1344     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1345   case MAT_SYMMETRIC:
1346   case MAT_STRUCTURALLY_SYMMETRIC:
1347   case MAT_NOT_HERMITIAN:
1348   case MAT_SYMMETRY_ETERNAL:
1349   case MAT_NOT_SYMMETRY_ETERNAL:
1350     break;
1351   default:
1352     SETERRQ(PETSC_ERR_SUP,"unknown option");
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1359 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1360 {
1361   int ierr;
1362   PetscFunctionBegin;
1363   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1369 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1370 {
1371   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1372   Mat         a = baij->A,b = baij->B;
1373   int         ierr,s1,s2,s3;
1374 
1375   PetscFunctionBegin;
1376   if (ll != rr) {
1377     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1378   }
1379   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1380   if (rr) {
1381     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1382     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1383     /* Overlap communication with computation. */
1384     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1385     /*} if (ll) { */
1386     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1387     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1388     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1389     /* } */
1390   /* scale  the diagonal block */
1391   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1392 
1393   /* if (rr) { */
1394     /* Do a scatter end and then right scale the off-diagonal block */
1395     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1396     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1397   }
1398 
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1404 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1405 {
1406   PetscFunctionBegin;
1407   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1412 int MatPrintHelp_MPISBAIJ(Mat A)
1413 {
1414   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1415   MPI_Comm    comm = A->comm;
1416   static int  called = 0;
1417   int         ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!a->rank) {
1421     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1422   }
1423   if (called) {PetscFunctionReturn(0);} else called = 1;
1424   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1425   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1431 int MatSetUnfactored_MPISBAIJ(Mat A)
1432 {
1433   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1434   int         ierr;
1435 
1436   PetscFunctionBegin;
1437   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatEqual_MPISBAIJ"
1445 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1446 {
1447   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1448   Mat         a,b,c,d;
1449   PetscTruth  flg;
1450   int         ierr;
1451 
1452   PetscFunctionBegin;
1453   a = matA->A; b = matA->B;
1454   c = matB->A; d = matB->B;
1455 
1456   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1457   if (flg == PETSC_TRUE) {
1458     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1459   }
1460   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1466 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1467 {
1468   int        ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1477 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1478 {
1479   int        i,ierr;
1480   PetscTruth flg;
1481 
1482   for (i=0; i<n; i++) {
1483     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1484     if (!flg) {
1485       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1486     }
1487   }
1488   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 
1493 /* -------------------------------------------------------------------*/
1494 static struct _MatOps MatOps_Values = {
1495        MatSetValues_MPISBAIJ,
1496        MatGetRow_MPISBAIJ,
1497        MatRestoreRow_MPISBAIJ,
1498        MatMult_MPISBAIJ,
1499 /* 4*/ MatMultAdd_MPISBAIJ,
1500        MatMultTranspose_MPISBAIJ,
1501        MatMultTransposeAdd_MPISBAIJ,
1502        0,
1503        0,
1504        0,
1505 /*10*/ 0,
1506        0,
1507        0,
1508        MatRelax_MPISBAIJ,
1509        MatTranspose_MPISBAIJ,
1510 /*15*/ MatGetInfo_MPISBAIJ,
1511        MatEqual_MPISBAIJ,
1512        MatGetDiagonal_MPISBAIJ,
1513        MatDiagonalScale_MPISBAIJ,
1514        MatNorm_MPISBAIJ,
1515 /*20*/ MatAssemblyBegin_MPISBAIJ,
1516        MatAssemblyEnd_MPISBAIJ,
1517        0,
1518        MatSetOption_MPISBAIJ,
1519        MatZeroEntries_MPISBAIJ,
1520 /*25*/ MatZeroRows_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*35*/ MatDuplicate_MPISBAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*40*/ 0,
1536        MatGetSubMatrices_MPISBAIJ,
1537        0,
1538        MatGetValues_MPISBAIJ,
1539        0,
1540 /*45*/ MatPrintHelp_MPISBAIJ,
1541        MatScale_MPISBAIJ,
1542        0,
1543        0,
1544        0,
1545 /*50*/ MatGetBlockSize_MPISBAIJ,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*55*/ 0,
1551        0,
1552        MatSetUnfactored_MPISBAIJ,
1553        0,
1554        MatSetValuesBlocked_MPISBAIJ,
1555 /*60*/ 0,
1556        0,
1557        0,
1558        MatGetPetscMaps_Petsc,
1559        0,
1560 /*65*/ 0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*70*/ MatGetRowMax_MPISBAIJ,
1566        0,
1567        0,
1568        0,
1569        0,
1570 /*75*/ 0,
1571        0,
1572        0,
1573        0,
1574        0,
1575 /*80*/ 0,
1576        0,
1577        0,
1578        0,
1579 /*85*/ MatLoad_MPISBAIJ
1580 };
1581 
1582 
1583 EXTERN_C_BEGIN
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1586 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1587 {
1588   PetscFunctionBegin;
1589   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1590   *iscopy = PETSC_FALSE;
1591   PetscFunctionReturn(0);
1592 }
1593 EXTERN_C_END
1594 
1595 EXTERN_C_BEGIN
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1598 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1599 {
1600   Mat_MPISBAIJ *b;
1601   int          ierr,i,mbs,Mbs;
1602 
1603   PetscFunctionBegin;
1604   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1605 
1606   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1607   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1608   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1609   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1610   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1611   if (d_nnz) {
1612     for (i=0; i<B->m/bs; i++) {
1613       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1614     }
1615   }
1616   if (o_nnz) {
1617     for (i=0; i<B->m/bs; i++) {
1618       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1619     }
1620   }
1621   B->preallocated = PETSC_TRUE;
1622   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1623   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1624   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1625   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1626 
1627   b   = (Mat_MPISBAIJ*)B->data;
1628   mbs = B->m/bs;
1629   Mbs = B->M/bs;
1630   if (mbs*bs != B->m) {
1631     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1632   }
1633 
1634   b->bs  = bs;
1635   b->bs2 = bs*bs;
1636   b->mbs = mbs;
1637   b->nbs = mbs;
1638   b->Mbs = Mbs;
1639   b->Nbs = Mbs;
1640 
1641   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1642   b->rowners[0]    = 0;
1643   for (i=2; i<=b->size; i++) {
1644     b->rowners[i] += b->rowners[i-1];
1645   }
1646   b->rstart    = b->rowners[b->rank];
1647   b->rend      = b->rowners[b->rank+1];
1648   b->cstart    = b->rstart;
1649   b->cend      = b->rend;
1650   for (i=0; i<=b->size; i++) {
1651     b->rowners_bs[i] = b->rowners[i]*bs;
1652   }
1653   b->rstart_bs = b-> rstart*bs;
1654   b->rend_bs   = b->rend*bs;
1655 
1656   b->cstart_bs = b->cstart*bs;
1657   b->cend_bs   = b->cend*bs;
1658 
1659 
1660   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1661   PetscLogObjectParent(B,b->A);
1662   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1663   PetscLogObjectParent(B,b->B);
1664 
1665   /* build cache for off array entries formed */
1666   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1667 
1668   PetscFunctionReturn(0);
1669 }
1670 EXTERN_C_END
1671 
1672 /*MC
1673    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1674    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1675 
1676    Options Database Keys:
1677 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1678 
1679   Level: beginner
1680 
1681 .seealso: MatCreateMPISBAIJ
1682 M*/
1683 
1684 EXTERN_C_BEGIN
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatCreate_MPISBAIJ"
1687 int MatCreate_MPISBAIJ(Mat B)
1688 {
1689   Mat_MPISBAIJ *b;
1690   int          ierr;
1691   PetscTruth   flg;
1692 
1693   PetscFunctionBegin;
1694 
1695   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1696   B->data = (void*)b;
1697   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1698   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1699 
1700   B->ops->destroy    = MatDestroy_MPISBAIJ;
1701   B->ops->view       = MatView_MPISBAIJ;
1702   B->mapping    = 0;
1703   B->factor     = 0;
1704   B->assembled  = PETSC_FALSE;
1705 
1706   B->insertmode = NOT_SET_VALUES;
1707   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1708   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1709 
1710   /* build local table of row and column ownerships */
1711   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1712   b->cowners    = b->rowners + b->size + 2;
1713   b->rowners_bs = b->cowners + b->size + 2;
1714   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1715 
1716   /* build cache for off array entries formed */
1717   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1718   b->donotstash  = PETSC_FALSE;
1719   b->colmap      = PETSC_NULL;
1720   b->garray      = PETSC_NULL;
1721   b->roworiented = PETSC_TRUE;
1722 
1723 #if defined(PETSC_USE_MAT_SINGLE)
1724   /* stuff for MatSetValues_XXX in single precision */
1725   b->setvalueslen     = 0;
1726   b->setvaluescopy    = PETSC_NULL;
1727 #endif
1728 
1729   /* stuff used in block assembly */
1730   b->barray       = 0;
1731 
1732   /* stuff used for matrix vector multiply */
1733   b->lvec         = 0;
1734   b->Mvctx        = 0;
1735   b->slvec0       = 0;
1736   b->slvec0b      = 0;
1737   b->slvec1       = 0;
1738   b->slvec1a      = 0;
1739   b->slvec1b      = 0;
1740   b->sMvctx       = 0;
1741 
1742   /* stuff for MatGetRow() */
1743   b->rowindices   = 0;
1744   b->rowvalues    = 0;
1745   b->getrowactive = PETSC_FALSE;
1746 
1747   /* hash table stuff */
1748   b->ht           = 0;
1749   b->hd           = 0;
1750   b->ht_size      = 0;
1751   b->ht_flag      = PETSC_FALSE;
1752   b->ht_fact      = 0;
1753   b->ht_total_ct  = 0;
1754   b->ht_insert_ct = 0;
1755 
1756   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1757   if (flg) {
1758     PetscReal fact = 1.39;
1759     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1760     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1761     if (fact <= 1.0) fact = 1.39;
1762     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1763     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1764   }
1765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1766                                      "MatStoreValues_MPISBAIJ",
1767                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1769                                      "MatRetrieveValues_MPISBAIJ",
1770                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1772                                      "MatGetDiagonalBlock_MPISBAIJ",
1773                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1775                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1776                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 EXTERN_C_END
1780 
1781 /*MC
1782    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1783 
1784    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1785    and MATMPISBAIJ otherwise.
1786 
1787    Options Database Keys:
1788 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1789 
1790   Level: beginner
1791 
1792 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1793 M*/
1794 
1795 EXTERN_C_BEGIN
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatCreate_SBAIJ"
1798 int MatCreate_SBAIJ(Mat A) {
1799   int ierr,size;
1800 
1801   PetscFunctionBegin;
1802   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1803   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1804   if (size == 1) {
1805     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1808   }
1809   PetscFunctionReturn(0);
1810 }
1811 EXTERN_C_END
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1815 /*@C
1816    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1817    the user should preallocate the matrix storage by setting the parameters
1818    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1819    performance can be increased by more than a factor of 50.
1820 
1821    Collective on Mat
1822 
1823    Input Parameters:
1824 +  A - the matrix
1825 .  bs   - size of blockk
1826 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1827            submatrix  (same for all local rows)
1828 .  d_nnz - array containing the number of block nonzeros in the various block rows
1829            in the upper triangular and diagonal part of the in diagonal portion of the local
1830            (possibly different for each block row) or PETSC_NULL.  You must leave room
1831            for the diagonal entry even if it is zero.
1832 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1833            submatrix (same for all local rows).
1834 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1835            off-diagonal portion of the local submatrix (possibly different for
1836            each block row) or PETSC_NULL.
1837 
1838 
1839    Options Database Keys:
1840 .   -mat_no_unroll - uses code that does not unroll the loops in the
1841                      block calculations (much slower)
1842 .   -mat_block_size - size of the blocks to use
1843 
1844    Notes:
1845 
1846    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1847    than it must be used on all processors that share the object for that argument.
1848 
1849    Storage Information:
1850    For a square global matrix we define each processor's diagonal portion
1851    to be its local rows and the corresponding columns (a square submatrix);
1852    each processor's off-diagonal portion encompasses the remainder of the
1853    local matrix (a rectangular submatrix).
1854 
1855    The user can specify preallocated storage for the diagonal part of
1856    the local submatrix with either d_nz or d_nnz (not both).  Set
1857    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1858    memory allocation.  Likewise, specify preallocated storage for the
1859    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1860 
1861    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1862    the figure below we depict these three local rows and all columns (0-11).
1863 
1864 .vb
1865            0 1 2 3 4 5 6 7 8 9 10 11
1866           -------------------
1867    row 3  |  o o o d d d o o o o o o
1868    row 4  |  o o o d d d o o o o o o
1869    row 5  |  o o o d d d o o o o o o
1870           -------------------
1871 .ve
1872 
1873    Thus, any entries in the d locations are stored in the d (diagonal)
1874    submatrix, and any entries in the o locations are stored in the
1875    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1876    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1877 
1878    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1879    plus the diagonal part of the d matrix,
1880    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1881    In general, for PDE problems in which most nonzeros are near the diagonal,
1882    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1883    or you will get TERRIBLE performance; see the users' manual chapter on
1884    matrices.
1885 
1886    Level: intermediate
1887 
1888 .keywords: matrix, block, aij, compressed row, sparse, parallel
1889 
1890 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1891 @*/
1892 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1893 {
1894   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1895 
1896   PetscFunctionBegin;
1897   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1898   if (f) {
1899     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatCreateMPISBAIJ"
1906 /*@C
1907    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1908    (block compressed row).  For good matrix assembly performance
1909    the user should preallocate the matrix storage by setting the parameters
1910    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1911    performance can be increased by more than a factor of 50.
1912 
1913    Collective on MPI_Comm
1914 
1915    Input Parameters:
1916 +  comm - MPI communicator
1917 .  bs   - size of blockk
1918 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1919            This value should be the same as the local size used in creating the
1920            y vector for the matrix-vector product y = Ax.
1921 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1922            This value should be the same as the local size used in creating the
1923            x vector for the matrix-vector product y = Ax.
1924 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1925 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1926 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1927            submatrix  (same for all local rows)
1928 .  d_nnz - array containing the number of block nonzeros in the various block rows
1929            in the upper triangular portion of the in diagonal portion of the local
1930            (possibly different for each block block row) or PETSC_NULL.
1931            You must leave room for the diagonal entry even if it is zero.
1932 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1933            submatrix (same for all local rows).
1934 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1935            off-diagonal portion of the local submatrix (possibly different for
1936            each block row) or PETSC_NULL.
1937 
1938    Output Parameter:
1939 .  A - the matrix
1940 
1941    Options Database Keys:
1942 .   -mat_no_unroll - uses code that does not unroll the loops in the
1943                      block calculations (much slower)
1944 .   -mat_block_size - size of the blocks to use
1945 .   -mat_mpi - use the parallel matrix data structures even on one processor
1946                (defaults to using SeqBAIJ format on one processor)
1947 
1948    Notes:
1949    The user MUST specify either the local or global matrix dimensions
1950    (possibly both).
1951 
1952    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1953    than it must be used on all processors that share the object for that argument.
1954 
1955    Storage Information:
1956    For a square global matrix we define each processor's diagonal portion
1957    to be its local rows and the corresponding columns (a square submatrix);
1958    each processor's off-diagonal portion encompasses the remainder of the
1959    local matrix (a rectangular submatrix).
1960 
1961    The user can specify preallocated storage for the diagonal part of
1962    the local submatrix with either d_nz or d_nnz (not both).  Set
1963    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1964    memory allocation.  Likewise, specify preallocated storage for the
1965    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1966 
1967    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1968    the figure below we depict these three local rows and all columns (0-11).
1969 
1970 .vb
1971            0 1 2 3 4 5 6 7 8 9 10 11
1972           -------------------
1973    row 3  |  o o o d d d o o o o o o
1974    row 4  |  o o o d d d o o o o o o
1975    row 5  |  o o o d d d o o o o o o
1976           -------------------
1977 .ve
1978 
1979    Thus, any entries in the d locations are stored in the d (diagonal)
1980    submatrix, and any entries in the o locations are stored in the
1981    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1982    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1983 
1984    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1985    plus the diagonal part of the d matrix,
1986    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1987    In general, for PDE problems in which most nonzeros are near the diagonal,
1988    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1989    or you will get TERRIBLE performance; see the users' manual chapter on
1990    matrices.
1991 
1992    Level: intermediate
1993 
1994 .keywords: matrix, block, aij, compressed row, sparse, parallel
1995 
1996 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1997 @*/
1998 
1999 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2000 {
2001   int ierr,size;
2002 
2003   PetscFunctionBegin;
2004   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2005   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2006   if (size > 1) {
2007     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2008     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2009   } else {
2010     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2011     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2019 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2020 {
2021   Mat          mat;
2022   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2023   int          ierr,len=0;
2024 
2025   PetscFunctionBegin;
2026   *newmat       = 0;
2027   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2028   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2029   /* ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); -- cause error? */
2030   mat->factor       = matin->factor;
2031   mat->preallocated = PETSC_TRUE;
2032   mat->assembled    = PETSC_TRUE;
2033   a = (Mat_MPISBAIJ*)mat->data;
2034   a->bs  = oldmat->bs;
2035   a->bs2 = oldmat->bs2;
2036   a->mbs = oldmat->mbs;
2037   a->nbs = oldmat->nbs;
2038   a->Mbs = oldmat->Mbs;
2039   a->Nbs = oldmat->Nbs;
2040 
2041   a->rstart       = oldmat->rstart;
2042   a->rend         = oldmat->rend;
2043   a->cstart       = oldmat->cstart;
2044   a->cend         = oldmat->cend;
2045   a->size         = oldmat->size;
2046   a->rank         = oldmat->rank;
2047   a->donotstash   = oldmat->donotstash;
2048   a->roworiented  = oldmat->roworiented;
2049   a->rowindices   = 0;
2050   a->rowvalues    = 0;
2051   a->getrowactive = PETSC_FALSE;
2052   a->barray       = 0;
2053   a->rstart_bs    = oldmat->rstart_bs;
2054   a->rend_bs      = oldmat->rend_bs;
2055   a->cstart_bs    = oldmat->cstart_bs;
2056   a->cend_bs      = oldmat->cend_bs;
2057 
2058   /* hash table stuff */
2059   a->ht           = 0;
2060   a->hd           = 0;
2061   a->ht_size      = 0;
2062   a->ht_flag      = oldmat->ht_flag;
2063   a->ht_fact      = oldmat->ht_fact;
2064   a->ht_total_ct  = 0;
2065   a->ht_insert_ct = 0;
2066 
2067   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2068   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2069   a->cowners    = a->rowners + a->size + 2;
2070   a->rowners_bs = a->cowners + a->size + 2;
2071   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2072   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2073   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2074   if (oldmat->colmap) {
2075 #if defined (PETSC_USE_CTABLE)
2076     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2077 #else
2078     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2079     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2080     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2081 #endif
2082   } else a->colmap = 0;
2083   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2084     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2085     PetscLogObjectMemory(mat,len*sizeof(int));
2086     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2087   } else a->garray = 0;
2088 
2089   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2090   PetscLogObjectParent(mat,a->lvec);
2091   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2092   PetscLogObjectParent(mat,a->Mvctx);
2093 
2094   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2095   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2096   ierr =  VecDuplicate(oldmat->slvec0b,&a->slvec0b);CHKERRQ(ierr);
2097   ierr =  VecDuplicate(oldmat->slvec1a,&a->slvec1a);CHKERRQ(ierr);
2098   ierr =  VecDuplicate(oldmat->slvec1b,&a->slvec1b);CHKERRQ(ierr);
2099   ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx);CHKERRQ(ierr);
2100 
2101   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2102   PetscLogObjectParent(mat,a->A);
2103   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2104   PetscLogObjectParent(mat,a->B);
2105   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2106   *newmat = mat;
2107   PetscFunctionReturn(0);
2108 }
2109 
2110 #include "petscsys.h"
2111 
2112 #undef __FUNCT__
2113 #define __FUNCT__ "MatLoad_MPISBAIJ"
2114 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2115 {
2116   Mat          A;
2117   int          i,nz,ierr,j,rstart,rend,fd;
2118   PetscScalar  *vals,*buf;
2119   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2120   MPI_Status   status;
2121   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2122   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2123   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2124   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2125   int          dcount,kmax,k,nzcount,tmp;
2126 
2127   PetscFunctionBegin;
2128   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2129 
2130   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2131   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2132   if (!rank) {
2133     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2134     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2135     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2136     if (header[3] < 0) {
2137       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2138     }
2139   }
2140 
2141   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2142   M = header[1]; N = header[2];
2143 
2144   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2145 
2146   /*
2147      This code adds extra rows to make sure the number of rows is
2148      divisible by the blocksize
2149   */
2150   Mbs        = M/bs;
2151   extra_rows = bs - M + bs*(Mbs);
2152   if (extra_rows == bs) extra_rows = 0;
2153   else                  Mbs++;
2154   if (extra_rows &&!rank) {
2155     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2156   }
2157 
2158   /* determine ownership of all rows */
2159   mbs        = Mbs/size + ((Mbs % size) > rank);
2160   m          = mbs*bs;
2161   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2162   browners   = rowners + size + 1;
2163   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2164   rowners[0] = 0;
2165   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2166   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2167   rstart = rowners[rank];
2168   rend   = rowners[rank+1];
2169 
2170   /* distribute row lengths to all processors */
2171   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2172   if (!rank) {
2173     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2174     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2175     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2176     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2177     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2178     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2179     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2180   } else {
2181     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2182   }
2183 
2184   if (!rank) {   /* procs[0] */
2185     /* calculate the number of nonzeros on each processor */
2186     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2187     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2188     for (i=0; i<size; i++) {
2189       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2190         procsnz[i] += rowlengths[j];
2191       }
2192     }
2193     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2194 
2195     /* determine max buffer needed and allocate it */
2196     maxnz = 0;
2197     for (i=0; i<size; i++) {
2198       maxnz = PetscMax(maxnz,procsnz[i]);
2199     }
2200     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2201 
2202     /* read in my part of the matrix column indices  */
2203     nz     = procsnz[0];
2204     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2205     mycols = ibuf;
2206     if (size == 1)  nz -= extra_rows;
2207     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2208     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2209 
2210     /* read in every ones (except the last) and ship off */
2211     for (i=1; i<size-1; i++) {
2212       nz   = procsnz[i];
2213       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2214       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2215     }
2216     /* read in the stuff for the last proc */
2217     if (size != 1) {
2218       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2219       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2220       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2221       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2222     }
2223     ierr = PetscFree(cols);CHKERRQ(ierr);
2224   } else {  /* procs[i], i>0 */
2225     /* determine buffer space needed for message */
2226     nz = 0;
2227     for (i=0; i<m; i++) {
2228       nz += locrowlens[i];
2229     }
2230     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2231     mycols = ibuf;
2232     /* receive message of column indices*/
2233     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2234     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2235     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2236   }
2237 
2238   /* loop over local rows, determining number of off diagonal entries */
2239   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2240   odlens   = dlens + (rend-rstart);
2241   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2242   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2243   masked1  = mask    + Mbs;
2244   masked2  = masked1 + Mbs;
2245   rowcount = 0; nzcount = 0;
2246   for (i=0; i<mbs; i++) {
2247     dcount  = 0;
2248     odcount = 0;
2249     for (j=0; j<bs; j++) {
2250       kmax = locrowlens[rowcount];
2251       for (k=0; k<kmax; k++) {
2252         tmp = mycols[nzcount++]/bs; /* block col. index */
2253         if (!mask[tmp]) {
2254           mask[tmp] = 1;
2255           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2256           else masked1[dcount++] = tmp; /* entry in diag portion */
2257         }
2258       }
2259       rowcount++;
2260     }
2261 
2262     dlens[i]  = dcount;  /* d_nzz[i] */
2263     odlens[i] = odcount; /* o_nzz[i] */
2264 
2265     /* zero out the mask elements we set */
2266     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2267     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2268   }
2269 
2270   /* create our matrix */
2271   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2272   ierr = MatSetType(A,type);CHKERRQ(ierr);
2273   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2274   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2275 
2276   if (!rank) {
2277     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2278     /* read in my part of the matrix numerical values  */
2279     nz = procsnz[0];
2280     vals = buf;
2281     mycols = ibuf;
2282     if (size == 1)  nz -= extra_rows;
2283     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2284     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2285 
2286     /* insert into matrix */
2287     jj      = rstart*bs;
2288     for (i=0; i<m; i++) {
2289       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2290       mycols += locrowlens[i];
2291       vals   += locrowlens[i];
2292       jj++;
2293     }
2294 
2295     /* read in other processors (except the last one) and ship out */
2296     for (i=1; i<size-1; i++) {
2297       nz   = procsnz[i];
2298       vals = buf;
2299       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2300       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2301     }
2302     /* the last proc */
2303     if (size != 1){
2304       nz   = procsnz[i] - extra_rows;
2305       vals = buf;
2306       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2307       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2308       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2309     }
2310     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2311 
2312   } else {
2313     /* receive numeric values */
2314     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2315 
2316     /* receive message of values*/
2317     vals   = buf;
2318     mycols = ibuf;
2319     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2320     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2321     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2322 
2323     /* insert into matrix */
2324     jj      = rstart*bs;
2325     for (i=0; i<m; i++) {
2326       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2327       mycols += locrowlens[i];
2328       vals   += locrowlens[i];
2329       jj++;
2330     }
2331   }
2332 
2333   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2334   ierr = PetscFree(buf);CHKERRQ(ierr);
2335   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2336   ierr = PetscFree(rowners);CHKERRQ(ierr);
2337   ierr = PetscFree(dlens);CHKERRQ(ierr);
2338   ierr = PetscFree(mask);CHKERRQ(ierr);
2339   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2340   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2341   *newmat = A;
2342   PetscFunctionReturn(0);
2343 }
2344 
2345 #undef __FUNCT__
2346 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2347 /*@
2348    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2349 
2350    Input Parameters:
2351 .  mat  - the matrix
2352 .  fact - factor
2353 
2354    Collective on Mat
2355 
2356    Level: advanced
2357 
2358   Notes:
2359    This can also be set by the command line option: -mat_use_hash_table fact
2360 
2361 .keywords: matrix, hashtable, factor, HT
2362 
2363 .seealso: MatSetOption()
2364 @*/
2365 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2366 {
2367   PetscFunctionBegin;
2368   SETERRQ(1,"Function not yet written for SBAIJ format");
2369   /* PetscFunctionReturn(0); */
2370 }
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2374 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2375 {
2376   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2377   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2378   PetscReal    atmp;
2379   PetscReal    *work,*svalues,*rvalues;
2380   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2381   int          rank,size,*rowners_bs,dest,count,source;
2382   PetscScalar  *va;
2383   MatScalar    *ba;
2384   MPI_Status   stat;
2385 
2386   PetscFunctionBegin;
2387   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2388   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2389 
2390   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2391   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2392 
2393   bs   = a->bs;
2394   mbs  = a->mbs;
2395   Mbs  = a->Mbs;
2396   ba   = b->a;
2397   bi   = b->i;
2398   bj   = b->j;
2399   /*
2400   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2401   PetscSynchronizedFlush(A->comm);
2402   */
2403 
2404   /* find ownerships */
2405   rowners_bs = a->rowners_bs;
2406   /*
2407   if (!rank){
2408     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2409   }
2410   */
2411 
2412   /* each proc creates an array to be distributed */
2413   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2414   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2415 
2416   /* row_max for B */
2417   if (rank != size-1){
2418     for (i=0; i<mbs; i++) {
2419       ncols = bi[1] - bi[0]; bi++;
2420       brow  = bs*i;
2421       for (j=0; j<ncols; j++){
2422         bcol = bs*(*bj);
2423         for (kcol=0; kcol<bs; kcol++){
2424           col = bcol + kcol;                 /* local col index */
2425           col += rowners_bs[rank+1];      /* global col index */
2426           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2427           for (krow=0; krow<bs; krow++){
2428             atmp = PetscAbsScalar(*ba); ba++;
2429             row = brow + krow;    /* local row index */
2430             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2431             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2432             if (work[col] < atmp) work[col] = atmp;
2433           }
2434         }
2435         bj++;
2436       }
2437     }
2438     /*
2439       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2440       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2441       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2442       */
2443 
2444     /* send values to its owners */
2445     for (dest=rank+1; dest<size; dest++){
2446       svalues = work + rowners_bs[dest];
2447       count   = rowners_bs[dest+1]-rowners_bs[dest];
2448       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2449       /*
2450       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2451       PetscSynchronizedFlush(A->comm);
2452       */
2453     }
2454   }
2455 
2456   /* receive values */
2457   if (rank){
2458     rvalues = work;
2459     count   = rowners_bs[rank+1]-rowners_bs[rank];
2460     for (source=0; source<rank; source++){
2461       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2462       /* process values */
2463       for (i=0; i<count; i++){
2464         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2465       }
2466       /*
2467       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2468       PetscSynchronizedFlush(A->comm);
2469       */
2470     }
2471   }
2472 
2473   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2474   ierr = PetscFree(work);CHKERRQ(ierr);
2475   PetscFunctionReturn(0);
2476 }
2477 
2478 #undef __FUNCT__
2479 #define __FUNCT__ "MatRelax_MPISBAIJ"
2480 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2481 {
2482   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2483   int            ierr,mbs=mat->mbs,bs=mat->bs;
2484   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2485   Vec            bb1;
2486 
2487   PetscFunctionBegin;
2488   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2489   if (bs > 1)
2490     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2491 
2492   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2493     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2494       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2495       its--;
2496     }
2497 
2498     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2499     while (its--){
2500 
2501       /* lower triangular part: slvec0b = - B^T*xx */
2502       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2503 
2504       /* copy xx into slvec0a */
2505       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2506       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2507       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2508       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2509 
2510       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2511 
2512       /* copy bb into slvec1a */
2513       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2514       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2515       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2516       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2517 
2518       /* set slvec1b = 0 */
2519       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2520 
2521       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2522       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2523       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2524       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2525 
2526       /* upper triangular part: bb1 = bb1 - B*x */
2527       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2528 
2529       /* local diagonal sweep */
2530       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2531     }
2532     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2533   } else {
2534     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2535   }
2536   PetscFunctionReturn(0);
2537 }
2538 
2539 #undef __FUNCT__
2540 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2541 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2542 {
2543   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2544   int            ierr;
2545   PetscScalar    mone=-1.0;
2546   Vec            lvec1,bb1;
2547 
2548   PetscFunctionBegin;
2549   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2550   if (mat->bs > 1)
2551     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2552 
2553   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2554     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2555       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2556       its--;
2557     }
2558 
2559     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2560     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2561     while (its--){
2562       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2563 
2564       /* lower diagonal part: bb1 = bb - B^T*xx */
2565       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2566       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2567 
2568       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2569       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2570       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2571 
2572       /* upper diagonal part: bb1 = bb1 - B*x */
2573       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2574       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2575 
2576       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2577 
2578       /* diagonal sweep */
2579       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2580     }
2581     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2582     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2583   } else {
2584     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2585   }
2586   PetscFunctionReturn(0);
2587 }
2588 
2589