xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 3ab0aad5e591026ee6fbe8b8c4e8eb05f49d9dc6)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 extern int MatSetUpMultiply_MPISBAIJ(Mat);
8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
112         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
113         new_j = (int*)(new_a + bs2*new_nz); \
114         new_i = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 #ifndef MatSetValues_SeqBAIJ_B_Private
155 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
185  \
186         /* malloc new storage space */ \
187         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
188         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
189         new_j = (int*)(new_a + bs2*new_nz); \
190         new_i = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 #endif
231 
232 #if defined(PETSC_USE_MAT_SINGLE)
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValues_MPISBAIJ"
235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
238   int          ierr,i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   int         ierr,i,N = m*n*b->bs2;
262   MatScalar   *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283   int         ierr,i,N = m*n;
284   MatScalar   *vsingle;
285 
286   PetscFunctionBegin;
287   SETERRQ(1,"Function not yet written for SBAIJ format");
288   /* PetscFunctionReturn(0); */
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
294 {
295   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
296   int         ierr,i,N = m*n*b->bs2;
297   MatScalar   *vsingle;
298 
299   PetscFunctionBegin;
300   SETERRQ(1,"Function not yet written for SBAIJ format");
301   /* PetscFunctionReturn(0); */
302 }
303 #endif
304 
305 /* Only add/insert a(i,j) with i<=j (blocks).
306    Any a(i,j) with i>j input by user is ingored.
307 */
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
311 {
312   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
313   MatScalar    value;
314   PetscTruth   roworiented = baij->roworiented;
315   int          ierr,i,j,row,col;
316   int          rstart_orig=baij->rstart_bs;
317   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
318   int          cend_orig=baij->cend_bs,bs=baij->bs;
319 
320   /* Some Variables required in the macro */
321   Mat          A = baij->A;
322   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
323   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
324   MatScalar    *aa=a->a;
325 
326   Mat          B = baij->B;
327   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
328   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
329   MatScalar    *ba=b->a;
330 
331   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
332   int          low,high,t,ridx,cidx,bs2=a->bs2;
333   MatScalar    *ap,*bap;
334 
335   /* for stash */
336   int          n_loc, *in_loc=0;
337   MatScalar    *v_loc=0;
338 
339   PetscFunctionBegin;
340 
341   if(!baij->donotstash){
342     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
343     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
344   }
345 
346   for (i=0; i<m; i++) {
347     if (im[i] < 0) continue;
348 #if defined(PETSC_USE_BOPT_g)
349     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
350 #endif
351     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
352       row = im[i] - rstart_orig;              /* local row index */
353       for (j=0; j<n; j++) {
354         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
355         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
356           col = in[j] - cstart_orig;          /* local col index */
357           brow = row/bs; bcol = col/bs;
358           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
359           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
360           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
361           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
362         } else if (in[j] < 0) continue;
363 #if defined(PETSC_USE_BOPT_g)
364         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
365 #endif
366         else {  /* off-diag entry (B) */
367           if (mat->was_assembled) {
368             if (!baij->colmap) {
369               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
370             }
371 #if defined (PETSC_USE_CTABLE)
372             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
373             col  = col - 1;
374 #else
375             col = baij->colmap[in[j]/bs] - 1;
376 #endif
377             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
378               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
379               col =  in[j];
380               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
381               B = baij->B;
382               b = (Mat_SeqBAIJ*)(B)->data;
383               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
384               ba=b->a;
385             } else col += in[j]%bs;
386           } else col = in[j];
387           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
388           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
389           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
390         }
391       }
392     } else {  /* off processor entry */
393       if (!baij->donotstash) {
394         n_loc = 0;
395         for (j=0; j<n; j++){
396           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
397           in_loc[n_loc] = in[j];
398           if (roworiented) {
399             v_loc[n_loc] = v[i*n+j];
400           } else {
401             v_loc[n_loc] = v[j*m+i];
402           }
403           n_loc++;
404         }
405         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
406       }
407     }
408   }
409 
410   if(!baij->donotstash){
411     ierr = PetscFree(in_loc);CHKERRQ(ierr);
412     ierr = PetscFree(v_loc);CHKERRQ(ierr);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
420 {
421   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
422   const MatScalar *value;
423   MatScalar       *barray=baij->barray;
424   PetscTruth      roworiented = baij->roworiented;
425   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
426   int             rend=baij->rend,cstart=baij->cstart,stepval;
427   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
428 
429   PetscFunctionBegin;
430   if(!barray) {
431     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
432     baij->barray = barray;
433   }
434 
435   if (roworiented) {
436     stepval = (n-1)*bs;
437   } else {
438     stepval = (m-1)*bs;
439   }
440   for (i=0; i<m; i++) {
441     if (im[i] < 0) continue;
442 #if defined(PETSC_USE_BOPT_g)
443     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
444 #endif
445     if (im[i] >= rstart && im[i] < rend) {
446       row = im[i] - rstart;
447       for (j=0; j<n; j++) {
448         /* If NumCol = 1 then a copy is not required */
449         if ((roworiented) && (n == 1)) {
450           barray = (MatScalar*) v + i*bs2;
451         } else if((!roworiented) && (m == 1)) {
452           barray = (MatScalar*) v + j*bs2;
453         } else { /* Here a copy is required */
454           if (roworiented) {
455             value = v + i*(stepval+bs)*bs + j*bs;
456           } else {
457             value = v + j*(stepval+bs)*bs + i*bs;
458           }
459           for (ii=0; ii<bs; ii++,value+=stepval) {
460             for (jj=0; jj<bs; jj++) {
461               *barray++  = *value++;
462             }
463           }
464           barray -=bs2;
465         }
466 
467         if (in[j] >= cstart && in[j] < cend){
468           col  = in[j] - cstart;
469           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
470         }
471         else if (in[j] < 0) continue;
472 #if defined(PETSC_USE_BOPT_g)
473         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
474 #endif
475         else {
476           if (mat->was_assembled) {
477             if (!baij->colmap) {
478               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
479             }
480 
481 #if defined(PETSC_USE_BOPT_g)
482 #if defined (PETSC_USE_CTABLE)
483             { int data;
484               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
485               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
486             }
487 #else
488             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
489 #endif
490 #endif
491 #if defined (PETSC_USE_CTABLE)
492 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
493             col  = (col - 1)/bs;
494 #else
495             col = (baij->colmap[in[j]] - 1)/bs;
496 #endif
497             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
498               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
499               col =  in[j];
500             }
501           }
502           else col = in[j];
503           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
504         }
505       }
506     } else {
507       if (!baij->donotstash) {
508         if (roworiented) {
509           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
510         } else {
511           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
512         }
513       }
514     }
515   }
516   PetscFunctionReturn(0);
517 }
518 
519 #define HASH_KEY 0.6180339887
520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
523 #undef __FUNCT__
524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
526 {
527   PetscFunctionBegin;
528   SETERRQ(1,"Function not yet written for SBAIJ format");
529   /* PetscFunctionReturn(0); */
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
535 {
536   PetscFunctionBegin;
537   SETERRQ(1,"Function not yet written for SBAIJ format");
538   /* PetscFunctionReturn(0); */
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatGetValues_MPISBAIJ"
543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
544 {
545   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
546   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
547   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
552     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
553     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
554       row = idxm[i] - bsrstart;
555       for (j=0; j<n; j++) {
556         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
557         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
558         if (idxn[j] >= bscstart && idxn[j] < bscend){
559           col = idxn[j] - bscstart;
560           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
561         } else {
562           if (!baij->colmap) {
563             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
564           }
565 #if defined (PETSC_USE_CTABLE)
566           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
567           data --;
568 #else
569           data = baij->colmap[idxn[j]/bs]-1;
570 #endif
571           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
572           else {
573             col  = data + idxn[j]%bs;
574             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575           }
576         }
577       }
578     } else {
579       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
580     }
581   }
582  PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatNorm_MPISBAIJ"
587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
588 {
589   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
590   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
591   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
592   int        ierr;
593   PetscReal  sum[2],*lnorm2;
594 
595   PetscFunctionBegin;
596   if (baij->size == 1) {
597     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
598   } else {
599     if (type == NORM_FROBENIUS) {
600       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
601       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
602       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
603       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
605       /*
606       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
607       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
608       */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       /*
611       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
612       PetscSynchronizedFlush(mat->comm); */
613 
614       *norm = sqrt(sum[0] + 2*sum[1]);
615       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 /*
624   Creates the hash table, and sets the table
625   This table is created only once.
626   If new entried need to be added to the matrix
627   then the hash table has to be destroyed and
628   recreated.
629 */
630 #undef __FUNCT__
631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
633 {
634   PetscFunctionBegin;
635   SETERRQ(1,"Function not yet written for SBAIJ format");
636   /* PetscFunctionReturn(0); */
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
642 {
643   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
644   int         ierr,nstash,reallocs;
645   InsertMode  addv;
646 
647   PetscFunctionBegin;
648   if (baij->donotstash) {
649     PetscFunctionReturn(0);
650   }
651 
652   /* make sure all processors are either in INSERTMODE or ADDMODE */
653   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
654   if (addv == (ADD_VALUES|INSERT_VALUES)) {
655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
656   }
657   mat->insertmode = addv; /* in case this processor had no cache */
658 
659   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
673   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
674   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
675   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
676   int         *row,*col,other_disassembled;
677   PetscTruth  r1,r2,r3;
678   MatScalar   *val;
679   InsertMode  addv = mat->insertmode;
680 
681   PetscFunctionBegin;
682 
683   if (!baij->donotstash) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       /*
687       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
688       PetscSynchronizedFlush(mat->comm);
689       */
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
695         if (j < n) ncols = j-i;
696         else       ncols = n-i;
697         /* Now assemble all these values with a single function call */
698         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703     /* Now process the block-stash. Since the values are stashed column-oriented,
704        set the roworiented flag to column oriented, and after MatSetValues()
705        restore the original flags */
706     r1 = baij->roworiented;
707     r2 = a->roworiented;
708     r3 = b->roworiented;
709     baij->roworiented = PETSC_FALSE;
710     a->roworiented    = PETSC_FALSE;
711     b->roworiented    = PETSC_FALSE;
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n;) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
719         if (j < n) ncols = j-i;
720         else       ncols = n-i;
721         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
722         i = j;
723       }
724     }
725     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
726     baij->roworiented = r1;
727     a->roworiented    = r2;
728     b->roworiented    = r3;
729   }
730 
731   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746 
747   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
748     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
749   }
750   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
752 
753 #if defined(PETSC_USE_BOPT_g)
754   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
755     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
756     baij->ht_total_ct  = 0;
757     baij->ht_insert_ct = 0;
758   }
759 #endif
760   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
761     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
762     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
763     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
764   }
765 
766   if (baij->rowvalues) {
767     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
768     baij->rowvalues = 0;
769   }
770 
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
779   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
780   PetscTruth        isascii,isdraw;
781   PetscViewer       sviewer;
782   PetscViewerFormat format;
783 
784   PetscFunctionBegin;
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
786   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
787   if (isascii) {
788     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
789     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
790       MatInfo info;
791       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
794               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
795               baij->bs,(int)info.memory);CHKERRQ(ierr);
796       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
797       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
798       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
799       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
800       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
801       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_INFO) {
804       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw       draw;
811     PetscTruth isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
814   }
815 
816   if (size == 1) {
817     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
818     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
819   } else {
820     /* assemble the entire matrix onto first processor. */
821     Mat         A;
822     Mat_SeqSBAIJ *Aloc;
823     Mat_SeqBAIJ *Bloc;
824     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
825     MatScalar   *a;
826 
827     /* Should this be the same type as mat? */
828     if (!rank) {
829       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
830     } else {
831       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
832     }
833     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
834     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
835     PetscLogObjectParent(mat,A);
836 
837     /* copy over the A part */
838     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
839     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
840     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
841 
842     for (i=0; i<mbs; i++) {
843       rvals[0] = bs*(baij->rstart + i);
844       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
845       for (j=ai[i]; j<ai[i+1]; j++) {
846         col = (baij->cstart+aj[j])*bs;
847         for (k=0; k<bs; k++) {
848           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
849           col++; a += bs;
850         }
851       }
852     }
853     /* copy over the B part */
854     Bloc = (Mat_SeqBAIJ*)baij->B->data;
855     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
856     for (i=0; i<mbs; i++) {
857       rvals[0] = bs*(baij->rstart + i);
858       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
859       for (j=ai[i]; j<ai[i+1]; j++) {
860         col = baij->garray[aj[j]]*bs;
861         for (k=0; k<bs; k++) {
862           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
863           col++; a += bs;
864         }
865       }
866     }
867     ierr = PetscFree(rvals);CHKERRQ(ierr);
868     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
869     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
870     /*
871        Everyone has to call to draw the matrix since the graphics waits are
872        synchronized across all processors that share the PetscDraw object
873     */
874     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
875     if (!rank) {
876       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
877       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
878     }
879     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
880     ierr = MatDestroy(A);CHKERRQ(ierr);
881   }
882   PetscFunctionReturn(0);
883 }
884 
885 #undef __FUNCT__
886 #define __FUNCT__ "MatView_MPISBAIJ"
887 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
888 {
889   int        ierr;
890   PetscTruth isascii,isdraw,issocket,isbinary;
891 
892   PetscFunctionBegin;
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
894   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
895   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
896   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
897   if (isascii || isdraw || issocket || isbinary) {
898     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
899   } else {
900     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
901   }
902   PetscFunctionReturn(0);
903 }
904 
905 #undef __FUNCT__
906 #define __FUNCT__ "MatDestroy_MPISBAIJ"
907 int MatDestroy_MPISBAIJ(Mat mat)
908 {
909   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
910   int         ierr;
911 
912   PetscFunctionBegin;
913 #if defined(PETSC_USE_LOG)
914   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
915 #endif
916   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
917   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
918   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
919   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
920   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
921 #if defined (PETSC_USE_CTABLE)
922   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
923 #else
924   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
925 #endif
926   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
927   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
928   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
929   if (baij->slvec0) {
930     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
931     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
932   }
933   if (baij->slvec1) {
934     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
935     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
936     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
937   }
938   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
939   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
940   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
941   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
942 #if defined(PETSC_USE_MAT_SINGLE)
943   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
944 #endif
945   ierr = PetscFree(baij);CHKERRQ(ierr);
946   PetscFunctionReturn(0);
947 }
948 
949 #undef __FUNCT__
950 #define __FUNCT__ "MatMult_MPISBAIJ"
951 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
952 {
953   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
954   int         ierr,nt,mbs=a->mbs,bs=a->bs;
955   PetscScalar *x,*from,zero=0.0;
956 
957   PetscFunctionBegin;
958   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
959   if (nt != A->n) {
960     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
961   }
962   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
963   if (nt != A->m) {
964     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
965   }
966 
967   /* diagonal part */
968   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
969   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
970 
971   /* subdiagonal part */
972   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
973 
974   /* copy x into the vec slvec0 */
975   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
976   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
977   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
978   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
979 
980   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
981   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
982   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
983 
984   /* supperdiagonal part */
985   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
986 
987   PetscFunctionReturn(0);
988 }
989 
990 #undef __FUNCT__
991 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
992 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
993 {
994   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
995   int         ierr,nt;
996 
997   PetscFunctionBegin;
998   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
999   if (nt != A->n) {
1000     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1001   }
1002   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1003   if (nt != A->m) {
1004     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1005   }
1006 
1007   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1008   /* do diagonal part */
1009   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1010   /* do supperdiagonal part */
1011   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1012   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1013   /* do subdiagonal part */
1014   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1015   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1016   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1017 
1018   PetscFunctionReturn(0);
1019 }
1020 
1021 #undef __FUNCT__
1022 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1023 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1024 {
1025   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1026   int          ierr,mbs=a->mbs,bs=a->bs;
1027   PetscScalar  *x,*from,zero=0.0;
1028 
1029   PetscFunctionBegin;
1030   /*
1031   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1032   PetscSynchronizedFlush(A->comm);
1033   */
1034   /* diagonal part */
1035   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1036   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1037 
1038   /* subdiagonal part */
1039   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1040 
1041   /* copy x into the vec slvec0 */
1042   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1043   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1044   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1045   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1046 
1047   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1048   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1049   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1050 
1051   /* supperdiagonal part */
1052   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1053 
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1059 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1060 {
1061   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1062   int        ierr;
1063 
1064   PetscFunctionBegin;
1065   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1066   /* do diagonal part */
1067   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1068   /* do supperdiagonal part */
1069   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1070   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1071 
1072   /* do subdiagonal part */
1073   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1074   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1075   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1076 
1077   PetscFunctionReturn(0);
1078 }
1079 
1080 #undef __FUNCT__
1081 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1082 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1083 {
1084   int ierr;
1085 
1086   PetscFunctionBegin;
1087   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1088   PetscFunctionReturn(0);
1089 }
1090 
1091 #undef __FUNCT__
1092 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1093 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1094 {
1095   int ierr;
1096 
1097   PetscFunctionBegin;
1098   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1099   PetscFunctionReturn(0);
1100 }
1101 
1102 /*
1103   This only works correctly for square matrices where the subblock A->A is the
1104    diagonal block
1105 */
1106 #undef __FUNCT__
1107 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1108 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1109 {
1110   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1111   int         ierr;
1112 
1113   PetscFunctionBegin;
1114   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1115   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatScale_MPISBAIJ"
1121 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1122 {
1123   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1124   int         ierr;
1125 
1126   PetscFunctionBegin;
1127   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1128   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1134 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1135 {
1136   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1137   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1138   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1139   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1140   int            *cmap,*idx_p,cstart = mat->cstart;
1141 
1142   PetscFunctionBegin;
1143   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1144   mat->getrowactive = PETSC_TRUE;
1145 
1146   if (!mat->rowvalues && (idx || v)) {
1147     /*
1148         allocate enough space to hold information from the longest row.
1149     */
1150     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1151     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1152     int     max = 1,mbs = mat->mbs,tmp;
1153     for (i=0; i<mbs; i++) {
1154       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1155       if (max < tmp) { max = tmp; }
1156     }
1157     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1158     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1159   }
1160 
1161   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1162   lrow = row - brstart;  /* local row index */
1163 
1164   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1165   if (!v)   {pvA = 0; pvB = 0;}
1166   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1167   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1168   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1169   nztot = nzA + nzB;
1170 
1171   cmap  = mat->garray;
1172   if (v  || idx) {
1173     if (nztot) {
1174       /* Sort by increasing column numbers, assuming A and B already sorted */
1175       int imark = -1;
1176       if (v) {
1177         *v = v_p = mat->rowvalues;
1178         for (i=0; i<nzB; i++) {
1179           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1180           else break;
1181         }
1182         imark = i;
1183         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1184         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1185       }
1186       if (idx) {
1187         *idx = idx_p = mat->rowindices;
1188         if (imark > -1) {
1189           for (i=0; i<imark; i++) {
1190             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1191           }
1192         } else {
1193           for (i=0; i<nzB; i++) {
1194             if (cmap[cworkB[i]/bs] < cstart)
1195               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1196             else break;
1197           }
1198           imark = i;
1199         }
1200         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1201         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1202       }
1203     } else {
1204       if (idx) *idx = 0;
1205       if (v)   *v   = 0;
1206     }
1207   }
1208   *nz = nztot;
1209   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1210   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1211   PetscFunctionReturn(0);
1212 }
1213 
1214 #undef __FUNCT__
1215 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1216 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1217 {
1218   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1219 
1220   PetscFunctionBegin;
1221   if (baij->getrowactive == PETSC_FALSE) {
1222     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1223   }
1224   baij->getrowactive = PETSC_FALSE;
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1230 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1231 {
1232   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1233 
1234   PetscFunctionBegin;
1235   *bs = baij->bs;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1241 int MatZeroEntries_MPISBAIJ(Mat A)
1242 {
1243   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1244   int         ierr;
1245 
1246   PetscFunctionBegin;
1247   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1248   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1254 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1255 {
1256   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1257   Mat         A = a->A,B = a->B;
1258   int         ierr;
1259   PetscReal   isend[5],irecv[5];
1260 
1261   PetscFunctionBegin;
1262   info->block_size     = (PetscReal)a->bs;
1263   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1264   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1265   isend[3] = info->memory;  isend[4] = info->mallocs;
1266   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1267   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1268   isend[3] += info->memory;  isend[4] += info->mallocs;
1269   if (flag == MAT_LOCAL) {
1270     info->nz_used      = isend[0];
1271     info->nz_allocated = isend[1];
1272     info->nz_unneeded  = isend[2];
1273     info->memory       = isend[3];
1274     info->mallocs      = isend[4];
1275   } else if (flag == MAT_GLOBAL_MAX) {
1276     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1277     info->nz_used      = irecv[0];
1278     info->nz_allocated = irecv[1];
1279     info->nz_unneeded  = irecv[2];
1280     info->memory       = irecv[3];
1281     info->mallocs      = irecv[4];
1282   } else if (flag == MAT_GLOBAL_SUM) {
1283     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1284     info->nz_used      = irecv[0];
1285     info->nz_allocated = irecv[1];
1286     info->nz_unneeded  = irecv[2];
1287     info->memory       = irecv[3];
1288     info->mallocs      = irecv[4];
1289   } else {
1290     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1291   }
1292   info->rows_global       = (PetscReal)A->M;
1293   info->columns_global    = (PetscReal)A->N;
1294   info->rows_local        = (PetscReal)A->m;
1295   info->columns_local     = (PetscReal)A->N;
1296   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1297   info->fill_ratio_needed = 0;
1298   info->factor_mallocs    = 0;
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1304 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1305 {
1306   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1307   int         ierr;
1308 
1309   PetscFunctionBegin;
1310   switch (op) {
1311   case MAT_NO_NEW_NONZERO_LOCATIONS:
1312   case MAT_YES_NEW_NONZERO_LOCATIONS:
1313   case MAT_COLUMNS_UNSORTED:
1314   case MAT_COLUMNS_SORTED:
1315   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1316   case MAT_KEEP_ZEROED_ROWS:
1317   case MAT_NEW_NONZERO_LOCATION_ERR:
1318     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1319     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1320     break;
1321   case MAT_ROW_ORIENTED:
1322     a->roworiented = PETSC_TRUE;
1323     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1324     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1325     break;
1326   case MAT_ROWS_SORTED:
1327   case MAT_ROWS_UNSORTED:
1328   case MAT_YES_NEW_DIAGONALS:
1329     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1330     break;
1331   case MAT_COLUMN_ORIENTED:
1332     a->roworiented = PETSC_FALSE;
1333     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1334     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1335     break;
1336   case MAT_IGNORE_OFF_PROC_ENTRIES:
1337     a->donotstash = PETSC_TRUE;
1338     break;
1339   case MAT_NO_NEW_DIAGONALS:
1340     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1341   case MAT_USE_HASH_TABLE:
1342     a->ht_flag = PETSC_TRUE;
1343     break;
1344   case MAT_NOT_SYMMETRIC:
1345   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1346   case MAT_HERMITIAN:
1347     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1348   case MAT_SYMMETRIC:
1349   case MAT_STRUCTURALLY_SYMMETRIC:
1350   case MAT_NOT_HERMITIAN:
1351   case MAT_SYMMETRY_ETERNAL:
1352   case MAT_NOT_SYMMETRY_ETERNAL:
1353     break;
1354   default:
1355     SETERRQ(PETSC_ERR_SUP,"unknown option");
1356   }
1357   PetscFunctionReturn(0);
1358 }
1359 
1360 #undef __FUNCT__
1361 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1362 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1363 {
1364   int ierr;
1365   PetscFunctionBegin;
1366   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1367   PetscFunctionReturn(0);
1368 }
1369 
1370 #undef __FUNCT__
1371 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1372 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1373 {
1374   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1375   Mat         a = baij->A,b = baij->B;
1376   int         ierr,s1,s2,s3;
1377 
1378   PetscFunctionBegin;
1379   if (ll != rr) {
1380     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1381   }
1382   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1383   if (rr) {
1384     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1385     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1386     /* Overlap communication with computation. */
1387     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1388     /*} if (ll) { */
1389     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1390     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1391     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1392     /* } */
1393   /* scale  the diagonal block */
1394   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1395 
1396   /* if (rr) { */
1397     /* Do a scatter end and then right scale the off-diagonal block */
1398     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1399     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1400   }
1401 
1402   PetscFunctionReturn(0);
1403 }
1404 
1405 #undef __FUNCT__
1406 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1407 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1408 {
1409   PetscFunctionBegin;
1410   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1411 }
1412 
1413 #undef __FUNCT__
1414 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1415 int MatPrintHelp_MPISBAIJ(Mat A)
1416 {
1417   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1418   MPI_Comm    comm = A->comm;
1419   static int  called = 0;
1420   int         ierr;
1421 
1422   PetscFunctionBegin;
1423   if (!a->rank) {
1424     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1425   }
1426   if (called) {PetscFunctionReturn(0);} else called = 1;
1427   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1428   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1429   PetscFunctionReturn(0);
1430 }
1431 
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1434 int MatSetUnfactored_MPISBAIJ(Mat A)
1435 {
1436   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1437   int         ierr;
1438 
1439   PetscFunctionBegin;
1440   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1441   PetscFunctionReturn(0);
1442 }
1443 
1444 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1445 
1446 #undef __FUNCT__
1447 #define __FUNCT__ "MatEqual_MPISBAIJ"
1448 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1449 {
1450   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1451   Mat         a,b,c,d;
1452   PetscTruth  flg;
1453   int         ierr;
1454 
1455   PetscFunctionBegin;
1456   a = matA->A; b = matA->B;
1457   c = matB->A; d = matB->B;
1458 
1459   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1460   if (flg == PETSC_TRUE) {
1461     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1462   }
1463   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1464   PetscFunctionReturn(0);
1465 }
1466 
1467 #undef __FUNCT__
1468 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1469 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1470 {
1471   int        ierr;
1472 
1473   PetscFunctionBegin;
1474   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1475   PetscFunctionReturn(0);
1476 }
1477 
1478 #undef __FUNCT__
1479 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1480 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1481 {
1482   int        i,ierr;
1483   PetscTruth flg;
1484 
1485   for (i=0; i<n; i++) {
1486     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1487     if (!flg) {
1488       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1489     }
1490   }
1491   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1492   PetscFunctionReturn(0);
1493 }
1494 
1495 
1496 /* -------------------------------------------------------------------*/
1497 static struct _MatOps MatOps_Values = {
1498        MatSetValues_MPISBAIJ,
1499        MatGetRow_MPISBAIJ,
1500        MatRestoreRow_MPISBAIJ,
1501        MatMult_MPISBAIJ,
1502 /* 4*/ MatMultAdd_MPISBAIJ,
1503        MatMultTranspose_MPISBAIJ,
1504        MatMultTransposeAdd_MPISBAIJ,
1505        0,
1506        0,
1507        0,
1508 /*10*/ 0,
1509        0,
1510        0,
1511        MatRelax_MPISBAIJ,
1512        MatTranspose_MPISBAIJ,
1513 /*15*/ MatGetInfo_MPISBAIJ,
1514        MatEqual_MPISBAIJ,
1515        MatGetDiagonal_MPISBAIJ,
1516        MatDiagonalScale_MPISBAIJ,
1517        MatNorm_MPISBAIJ,
1518 /*20*/ MatAssemblyBegin_MPISBAIJ,
1519        MatAssemblyEnd_MPISBAIJ,
1520        0,
1521        MatSetOption_MPISBAIJ,
1522        MatZeroEntries_MPISBAIJ,
1523 /*25*/ MatZeroRows_MPISBAIJ,
1524        0,
1525        0,
1526        0,
1527        0,
1528 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*35*/ MatDuplicate_MPISBAIJ,
1534        0,
1535        0,
1536        0,
1537        0,
1538 /*40*/ 0,
1539        MatGetSubMatrices_MPISBAIJ,
1540        MatIncreaseOverlap_MPISBAIJ,
1541        MatGetValues_MPISBAIJ,
1542        0,
1543 /*45*/ MatPrintHelp_MPISBAIJ,
1544        MatScale_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548 /*50*/ MatGetBlockSize_MPISBAIJ,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*55*/ 0,
1554        0,
1555        MatSetUnfactored_MPISBAIJ,
1556        0,
1557        MatSetValuesBlocked_MPISBAIJ,
1558 /*60*/ 0,
1559        0,
1560        0,
1561        MatGetPetscMaps_Petsc,
1562        0,
1563 /*65*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*70*/ MatGetRowMax_MPISBAIJ,
1569        0,
1570        0,
1571        0,
1572        0,
1573 /*75*/ 0,
1574        0,
1575        0,
1576        0,
1577        0,
1578 /*80*/ 0,
1579        0,
1580        0,
1581        0,
1582 /*85*/ MatLoad_MPISBAIJ
1583 };
1584 
1585 
1586 EXTERN_C_BEGIN
1587 #undef __FUNCT__
1588 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1589 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1590 {
1591   PetscFunctionBegin;
1592   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1593   *iscopy = PETSC_FALSE;
1594   PetscFunctionReturn(0);
1595 }
1596 EXTERN_C_END
1597 
1598 EXTERN_C_BEGIN
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1601 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1602 {
1603   Mat_MPISBAIJ *b;
1604   int          ierr,i,mbs,Mbs;
1605 
1606   PetscFunctionBegin;
1607   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1608 
1609   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1610   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1611   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1612   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1613   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1614   if (d_nnz) {
1615     for (i=0; i<B->m/bs; i++) {
1616       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1617     }
1618   }
1619   if (o_nnz) {
1620     for (i=0; i<B->m/bs; i++) {
1621       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1622     }
1623   }
1624   B->preallocated = PETSC_TRUE;
1625   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1626   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1627   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1628   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1629 
1630   b   = (Mat_MPISBAIJ*)B->data;
1631   mbs = B->m/bs;
1632   Mbs = B->M/bs;
1633   if (mbs*bs != B->m) {
1634     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1635   }
1636 
1637   b->bs  = bs;
1638   b->bs2 = bs*bs;
1639   b->mbs = mbs;
1640   b->nbs = mbs;
1641   b->Mbs = Mbs;
1642   b->Nbs = Mbs;
1643 
1644   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1645   b->rowners[0]    = 0;
1646   for (i=2; i<=b->size; i++) {
1647     b->rowners[i] += b->rowners[i-1];
1648   }
1649   b->rstart    = b->rowners[b->rank];
1650   b->rend      = b->rowners[b->rank+1];
1651   b->cstart    = b->rstart;
1652   b->cend      = b->rend;
1653   for (i=0; i<=b->size; i++) {
1654     b->rowners_bs[i] = b->rowners[i]*bs;
1655   }
1656   b->rstart_bs = b-> rstart*bs;
1657   b->rend_bs   = b->rend*bs;
1658 
1659   b->cstart_bs = b->cstart*bs;
1660   b->cend_bs   = b->cend*bs;
1661 
1662   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1663   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1664   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1665   PetscLogObjectParent(B,b->A);
1666 
1667   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1668   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1669   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1670   PetscLogObjectParent(B,b->B);
1671 
1672   /* build cache for off array entries formed */
1673   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1674 
1675   PetscFunctionReturn(0);
1676 }
1677 EXTERN_C_END
1678 
1679 /*MC
1680    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1681    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1682 
1683    Options Database Keys:
1684 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1685 
1686   Level: beginner
1687 
1688 .seealso: MatCreateMPISBAIJ
1689 M*/
1690 
1691 EXTERN_C_BEGIN
1692 #undef __FUNCT__
1693 #define __FUNCT__ "MatCreate_MPISBAIJ"
1694 int MatCreate_MPISBAIJ(Mat B)
1695 {
1696   Mat_MPISBAIJ *b;
1697   int          ierr;
1698   PetscTruth   flg;
1699 
1700   PetscFunctionBegin;
1701 
1702   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1703   B->data = (void*)b;
1704   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1705   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1706 
1707   B->ops->destroy    = MatDestroy_MPISBAIJ;
1708   B->ops->view       = MatView_MPISBAIJ;
1709   B->mapping    = 0;
1710   B->factor     = 0;
1711   B->assembled  = PETSC_FALSE;
1712 
1713   B->insertmode = NOT_SET_VALUES;
1714   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1715   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1716 
1717   /* build local table of row and column ownerships */
1718   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1719   b->cowners    = b->rowners + b->size + 2;
1720   b->rowners_bs = b->cowners + b->size + 2;
1721   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1722 
1723   /* build cache for off array entries formed */
1724   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1725   b->donotstash  = PETSC_FALSE;
1726   b->colmap      = PETSC_NULL;
1727   b->garray      = PETSC_NULL;
1728   b->roworiented = PETSC_TRUE;
1729 
1730 #if defined(PETSC_USE_MAT_SINGLE)
1731   /* stuff for MatSetValues_XXX in single precision */
1732   b->setvalueslen     = 0;
1733   b->setvaluescopy    = PETSC_NULL;
1734 #endif
1735 
1736   /* stuff used in block assembly */
1737   b->barray       = 0;
1738 
1739   /* stuff used for matrix vector multiply */
1740   b->lvec         = 0;
1741   b->Mvctx        = 0;
1742   b->slvec0       = 0;
1743   b->slvec0b      = 0;
1744   b->slvec1       = 0;
1745   b->slvec1a      = 0;
1746   b->slvec1b      = 0;
1747   b->sMvctx       = 0;
1748 
1749   /* stuff for MatGetRow() */
1750   b->rowindices   = 0;
1751   b->rowvalues    = 0;
1752   b->getrowactive = PETSC_FALSE;
1753 
1754   /* hash table stuff */
1755   b->ht           = 0;
1756   b->hd           = 0;
1757   b->ht_size      = 0;
1758   b->ht_flag      = PETSC_FALSE;
1759   b->ht_fact      = 0;
1760   b->ht_total_ct  = 0;
1761   b->ht_insert_ct = 0;
1762 
1763   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1764   if (flg) {
1765     PetscReal fact = 1.39;
1766     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1767     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1768     if (fact <= 1.0) fact = 1.39;
1769     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1770     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1771   }
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1773                                      "MatStoreValues_MPISBAIJ",
1774                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1776                                      "MatRetrieveValues_MPISBAIJ",
1777                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1779                                      "MatGetDiagonalBlock_MPISBAIJ",
1780                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1782                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1783                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1784   PetscFunctionReturn(0);
1785 }
1786 EXTERN_C_END
1787 
1788 /*MC
1789    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1790 
1791    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1792    and MATMPISBAIJ otherwise.
1793 
1794    Options Database Keys:
1795 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1796 
1797   Level: beginner
1798 
1799 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1800 M*/
1801 
1802 EXTERN_C_BEGIN
1803 #undef __FUNCT__
1804 #define __FUNCT__ "MatCreate_SBAIJ"
1805 int MatCreate_SBAIJ(Mat A) {
1806   int ierr,size;
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1810   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1811   if (size == 1) {
1812     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1813   } else {
1814     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1815   }
1816   PetscFunctionReturn(0);
1817 }
1818 EXTERN_C_END
1819 
1820 #undef __FUNCT__
1821 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1822 /*@C
1823    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1824    the user should preallocate the matrix storage by setting the parameters
1825    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1826    performance can be increased by more than a factor of 50.
1827 
1828    Collective on Mat
1829 
1830    Input Parameters:
1831 +  A - the matrix
1832 .  bs   - size of blockk
1833 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1834            submatrix  (same for all local rows)
1835 .  d_nnz - array containing the number of block nonzeros in the various block rows
1836            in the upper triangular and diagonal part of the in diagonal portion of the local
1837            (possibly different for each block row) or PETSC_NULL.  You must leave room
1838            for the diagonal entry even if it is zero.
1839 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1840            submatrix (same for all local rows).
1841 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1842            off-diagonal portion of the local submatrix (possibly different for
1843            each block row) or PETSC_NULL.
1844 
1845 
1846    Options Database Keys:
1847 .   -mat_no_unroll - uses code that does not unroll the loops in the
1848                      block calculations (much slower)
1849 .   -mat_block_size - size of the blocks to use
1850 
1851    Notes:
1852 
1853    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1854    than it must be used on all processors that share the object for that argument.
1855 
1856    Storage Information:
1857    For a square global matrix we define each processor's diagonal portion
1858    to be its local rows and the corresponding columns (a square submatrix);
1859    each processor's off-diagonal portion encompasses the remainder of the
1860    local matrix (a rectangular submatrix).
1861 
1862    The user can specify preallocated storage for the diagonal part of
1863    the local submatrix with either d_nz or d_nnz (not both).  Set
1864    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1865    memory allocation.  Likewise, specify preallocated storage for the
1866    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1867 
1868    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1869    the figure below we depict these three local rows and all columns (0-11).
1870 
1871 .vb
1872            0 1 2 3 4 5 6 7 8 9 10 11
1873           -------------------
1874    row 3  |  o o o d d d o o o o o o
1875    row 4  |  o o o d d d o o o o o o
1876    row 5  |  o o o d d d o o o o o o
1877           -------------------
1878 .ve
1879 
1880    Thus, any entries in the d locations are stored in the d (diagonal)
1881    submatrix, and any entries in the o locations are stored in the
1882    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1883    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1884 
1885    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1886    plus the diagonal part of the d matrix,
1887    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1888    In general, for PDE problems in which most nonzeros are near the diagonal,
1889    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1890    or you will get TERRIBLE performance; see the users' manual chapter on
1891    matrices.
1892 
1893    Level: intermediate
1894 
1895 .keywords: matrix, block, aij, compressed row, sparse, parallel
1896 
1897 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1898 @*/
1899 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1900 {
1901   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1902 
1903   PetscFunctionBegin;
1904   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1905   if (f) {
1906     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1907   }
1908   PetscFunctionReturn(0);
1909 }
1910 
1911 #undef __FUNCT__
1912 #define __FUNCT__ "MatCreateMPISBAIJ"
1913 /*@C
1914    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1915    (block compressed row).  For good matrix assembly performance
1916    the user should preallocate the matrix storage by setting the parameters
1917    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1918    performance can be increased by more than a factor of 50.
1919 
1920    Collective on MPI_Comm
1921 
1922    Input Parameters:
1923 +  comm - MPI communicator
1924 .  bs   - size of blockk
1925 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1926            This value should be the same as the local size used in creating the
1927            y vector for the matrix-vector product y = Ax.
1928 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1929            This value should be the same as the local size used in creating the
1930            x vector for the matrix-vector product y = Ax.
1931 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1932 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1933 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1934            submatrix  (same for all local rows)
1935 .  d_nnz - array containing the number of block nonzeros in the various block rows
1936            in the upper triangular portion of the in diagonal portion of the local
1937            (possibly different for each block block row) or PETSC_NULL.
1938            You must leave room for the diagonal entry even if it is zero.
1939 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1940            submatrix (same for all local rows).
1941 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1942            off-diagonal portion of the local submatrix (possibly different for
1943            each block row) or PETSC_NULL.
1944 
1945    Output Parameter:
1946 .  A - the matrix
1947 
1948    Options Database Keys:
1949 .   -mat_no_unroll - uses code that does not unroll the loops in the
1950                      block calculations (much slower)
1951 .   -mat_block_size - size of the blocks to use
1952 .   -mat_mpi - use the parallel matrix data structures even on one processor
1953                (defaults to using SeqBAIJ format on one processor)
1954 
1955    Notes:
1956    The user MUST specify either the local or global matrix dimensions
1957    (possibly both).
1958 
1959    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1960    than it must be used on all processors that share the object for that argument.
1961 
1962    Storage Information:
1963    For a square global matrix we define each processor's diagonal portion
1964    to be its local rows and the corresponding columns (a square submatrix);
1965    each processor's off-diagonal portion encompasses the remainder of the
1966    local matrix (a rectangular submatrix).
1967 
1968    The user can specify preallocated storage for the diagonal part of
1969    the local submatrix with either d_nz or d_nnz (not both).  Set
1970    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1971    memory allocation.  Likewise, specify preallocated storage for the
1972    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1973 
1974    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1975    the figure below we depict these three local rows and all columns (0-11).
1976 
1977 .vb
1978            0 1 2 3 4 5 6 7 8 9 10 11
1979           -------------------
1980    row 3  |  o o o d d d o o o o o o
1981    row 4  |  o o o d d d o o o o o o
1982    row 5  |  o o o d d d o o o o o o
1983           -------------------
1984 .ve
1985 
1986    Thus, any entries in the d locations are stored in the d (diagonal)
1987    submatrix, and any entries in the o locations are stored in the
1988    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1989    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1990 
1991    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1992    plus the diagonal part of the d matrix,
1993    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1994    In general, for PDE problems in which most nonzeros are near the diagonal,
1995    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1996    or you will get TERRIBLE performance; see the users' manual chapter on
1997    matrices.
1998 
1999    Level: intermediate
2000 
2001 .keywords: matrix, block, aij, compressed row, sparse, parallel
2002 
2003 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2004 @*/
2005 
2006 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2007 {
2008   int ierr,size;
2009 
2010   PetscFunctionBegin;
2011   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2012   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2013   if (size > 1) {
2014     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2015     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2016   } else {
2017     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2018     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2019   }
2020   PetscFunctionReturn(0);
2021 }
2022 
2023 
2024 #undef __FUNCT__
2025 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2026 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2027 {
2028   Mat          mat;
2029   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2030   int          ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2031   PetscScalar  *array;
2032 
2033   PetscFunctionBegin;
2034   *newmat       = 0;
2035   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2036   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2037   ierr              = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2038   mat->factor       = matin->factor;
2039   mat->preallocated = PETSC_TRUE;
2040   mat->assembled    = PETSC_TRUE;
2041   mat->insertmode   = NOT_SET_VALUES;
2042 
2043   a = (Mat_MPISBAIJ*)mat->data;
2044   a->bs  = oldmat->bs;
2045   a->bs2 = oldmat->bs2;
2046   a->mbs = oldmat->mbs;
2047   a->nbs = oldmat->nbs;
2048   a->Mbs = oldmat->Mbs;
2049   a->Nbs = oldmat->Nbs;
2050 
2051   a->rstart       = oldmat->rstart;
2052   a->rend         = oldmat->rend;
2053   a->cstart       = oldmat->cstart;
2054   a->cend         = oldmat->cend;
2055   a->size         = oldmat->size;
2056   a->rank         = oldmat->rank;
2057   a->donotstash   = oldmat->donotstash;
2058   a->roworiented  = oldmat->roworiented;
2059   a->rowindices   = 0;
2060   a->rowvalues    = 0;
2061   a->getrowactive = PETSC_FALSE;
2062   a->barray       = 0;
2063   a->rstart_bs    = oldmat->rstart_bs;
2064   a->rend_bs      = oldmat->rend_bs;
2065   a->cstart_bs    = oldmat->cstart_bs;
2066   a->cend_bs      = oldmat->cend_bs;
2067 
2068   /* hash table stuff */
2069   a->ht           = 0;
2070   a->hd           = 0;
2071   a->ht_size      = 0;
2072   a->ht_flag      = oldmat->ht_flag;
2073   a->ht_fact      = oldmat->ht_fact;
2074   a->ht_total_ct  = 0;
2075   a->ht_insert_ct = 0;
2076 
2077   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2078   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2079   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2080   if (oldmat->colmap) {
2081 #if defined (PETSC_USE_CTABLE)
2082     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2083 #else
2084     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2085     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2086     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2087 #endif
2088   } else a->colmap = 0;
2089 
2090   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2091     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2092     PetscLogObjectMemory(mat,len*sizeof(int));
2093     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2094   } else a->garray = 0;
2095 
2096   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2097   PetscLogObjectParent(mat,a->lvec);
2098   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2099   PetscLogObjectParent(mat,a->Mvctx);
2100 
2101   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2102   PetscLogObjectParent(mat,a->slvec0);
2103   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2104   PetscLogObjectParent(mat,a->slvec1);
2105 
2106   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2107   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2108   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2109   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2110   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2111   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2113   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2114   PetscLogObjectParent(mat,a->slvec0);
2115   PetscLogObjectParent(mat,a->slvec1);
2116   PetscLogObjectParent(mat,a->slvec0b);
2117   PetscLogObjectParent(mat,a->slvec1a);
2118   PetscLogObjectParent(mat,a->slvec1b);
2119 
2120   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2121   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2122   a->sMvctx = oldmat->sMvctx;
2123   PetscLogObjectParent(mat,a->sMvctx);
2124 
2125   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2126   PetscLogObjectParent(mat,a->A);
2127   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2128   PetscLogObjectParent(mat,a->B);
2129   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2130   *newmat = mat;
2131   PetscFunctionReturn(0);
2132 }
2133 
2134 #include "petscsys.h"
2135 
2136 #undef __FUNCT__
2137 #define __FUNCT__ "MatLoad_MPISBAIJ"
2138 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2139 {
2140   Mat          A;
2141   int          i,nz,ierr,j,rstart,rend,fd;
2142   PetscScalar  *vals,*buf;
2143   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2144   MPI_Status   status;
2145   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2146   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2147   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2148   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2149   int          dcount,kmax,k,nzcount,tmp;
2150 
2151   PetscFunctionBegin;
2152   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2153 
2154   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2155   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2156   if (!rank) {
2157     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2158     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2159     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2160     if (header[3] < 0) {
2161       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2162     }
2163   }
2164 
2165   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2166   M = header[1]; N = header[2];
2167 
2168   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2169 
2170   /*
2171      This code adds extra rows to make sure the number of rows is
2172      divisible by the blocksize
2173   */
2174   Mbs        = M/bs;
2175   extra_rows = bs - M + bs*(Mbs);
2176   if (extra_rows == bs) extra_rows = 0;
2177   else                  Mbs++;
2178   if (extra_rows &&!rank) {
2179     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2180   }
2181 
2182   /* determine ownership of all rows */
2183   mbs        = Mbs/size + ((Mbs % size) > rank);
2184   m          = mbs*bs;
2185   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2186   browners   = rowners + size + 1;
2187   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2188   rowners[0] = 0;
2189   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2190   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2191   rstart = rowners[rank];
2192   rend   = rowners[rank+1];
2193 
2194   /* distribute row lengths to all processors */
2195   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2196   if (!rank) {
2197     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2198     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2199     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2200     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2201     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2202     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2203     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2204   } else {
2205     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2206   }
2207 
2208   if (!rank) {   /* procs[0] */
2209     /* calculate the number of nonzeros on each processor */
2210     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2211     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2212     for (i=0; i<size; i++) {
2213       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2214         procsnz[i] += rowlengths[j];
2215       }
2216     }
2217     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2218 
2219     /* determine max buffer needed and allocate it */
2220     maxnz = 0;
2221     for (i=0; i<size; i++) {
2222       maxnz = PetscMax(maxnz,procsnz[i]);
2223     }
2224     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2225 
2226     /* read in my part of the matrix column indices  */
2227     nz     = procsnz[0];
2228     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2229     mycols = ibuf;
2230     if (size == 1)  nz -= extra_rows;
2231     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2232     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2233 
2234     /* read in every ones (except the last) and ship off */
2235     for (i=1; i<size-1; i++) {
2236       nz   = procsnz[i];
2237       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2238       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2239     }
2240     /* read in the stuff for the last proc */
2241     if (size != 1) {
2242       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2243       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2244       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2245       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2246     }
2247     ierr = PetscFree(cols);CHKERRQ(ierr);
2248   } else {  /* procs[i], i>0 */
2249     /* determine buffer space needed for message */
2250     nz = 0;
2251     for (i=0; i<m; i++) {
2252       nz += locrowlens[i];
2253     }
2254     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2255     mycols = ibuf;
2256     /* receive message of column indices*/
2257     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2258     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2259     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2260   }
2261 
2262   /* loop over local rows, determining number of off diagonal entries */
2263   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2264   odlens   = dlens + (rend-rstart);
2265   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2266   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2267   masked1  = mask    + Mbs;
2268   masked2  = masked1 + Mbs;
2269   rowcount = 0; nzcount = 0;
2270   for (i=0; i<mbs; i++) {
2271     dcount  = 0;
2272     odcount = 0;
2273     for (j=0; j<bs; j++) {
2274       kmax = locrowlens[rowcount];
2275       for (k=0; k<kmax; k++) {
2276         tmp = mycols[nzcount++]/bs; /* block col. index */
2277         if (!mask[tmp]) {
2278           mask[tmp] = 1;
2279           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2280           else masked1[dcount++] = tmp; /* entry in diag portion */
2281         }
2282       }
2283       rowcount++;
2284     }
2285 
2286     dlens[i]  = dcount;  /* d_nzz[i] */
2287     odlens[i] = odcount; /* o_nzz[i] */
2288 
2289     /* zero out the mask elements we set */
2290     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2291     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2292   }
2293 
2294   /* create our matrix */
2295   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2296   ierr = MatSetType(A,type);CHKERRQ(ierr);
2297   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2298   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2299 
2300   if (!rank) {
2301     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2302     /* read in my part of the matrix numerical values  */
2303     nz = procsnz[0];
2304     vals = buf;
2305     mycols = ibuf;
2306     if (size == 1)  nz -= extra_rows;
2307     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2308     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2309 
2310     /* insert into matrix */
2311     jj      = rstart*bs;
2312     for (i=0; i<m; i++) {
2313       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2314       mycols += locrowlens[i];
2315       vals   += locrowlens[i];
2316       jj++;
2317     }
2318 
2319     /* read in other processors (except the last one) and ship out */
2320     for (i=1; i<size-1; i++) {
2321       nz   = procsnz[i];
2322       vals = buf;
2323       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2324       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2325     }
2326     /* the last proc */
2327     if (size != 1){
2328       nz   = procsnz[i] - extra_rows;
2329       vals = buf;
2330       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2331       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2332       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2333     }
2334     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2335 
2336   } else {
2337     /* receive numeric values */
2338     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2339 
2340     /* receive message of values*/
2341     vals   = buf;
2342     mycols = ibuf;
2343     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2344     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2345     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2346 
2347     /* insert into matrix */
2348     jj      = rstart*bs;
2349     for (i=0; i<m; i++) {
2350       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2351       mycols += locrowlens[i];
2352       vals   += locrowlens[i];
2353       jj++;
2354     }
2355   }
2356 
2357   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2358   ierr = PetscFree(buf);CHKERRQ(ierr);
2359   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2360   ierr = PetscFree(rowners);CHKERRQ(ierr);
2361   ierr = PetscFree(dlens);CHKERRQ(ierr);
2362   ierr = PetscFree(mask);CHKERRQ(ierr);
2363   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2364   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2365   *newmat = A;
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2371 /*@
2372    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2373 
2374    Input Parameters:
2375 .  mat  - the matrix
2376 .  fact - factor
2377 
2378    Collective on Mat
2379 
2380    Level: advanced
2381 
2382   Notes:
2383    This can also be set by the command line option: -mat_use_hash_table fact
2384 
2385 .keywords: matrix, hashtable, factor, HT
2386 
2387 .seealso: MatSetOption()
2388 @*/
2389 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2390 {
2391   PetscFunctionBegin;
2392   SETERRQ(1,"Function not yet written for SBAIJ format");
2393   /* PetscFunctionReturn(0); */
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2398 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2399 {
2400   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2401   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2402   PetscReal    atmp;
2403   PetscReal    *work,*svalues,*rvalues;
2404   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2405   int          rank,size,*rowners_bs,dest,count,source;
2406   PetscScalar  *va;
2407   MatScalar    *ba;
2408   MPI_Status   stat;
2409 
2410   PetscFunctionBegin;
2411   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2412   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2413 
2414   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2415   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2416 
2417   bs   = a->bs;
2418   mbs  = a->mbs;
2419   Mbs  = a->Mbs;
2420   ba   = b->a;
2421   bi   = b->i;
2422   bj   = b->j;
2423   /*
2424   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2425   PetscSynchronizedFlush(A->comm);
2426   */
2427 
2428   /* find ownerships */
2429   rowners_bs = a->rowners_bs;
2430   /*
2431   if (!rank){
2432     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2433   }
2434   */
2435 
2436   /* each proc creates an array to be distributed */
2437   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2438   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2439 
2440   /* row_max for B */
2441   if (rank != size-1){
2442     for (i=0; i<mbs; i++) {
2443       ncols = bi[1] - bi[0]; bi++;
2444       brow  = bs*i;
2445       for (j=0; j<ncols; j++){
2446         bcol = bs*(*bj);
2447         for (kcol=0; kcol<bs; kcol++){
2448           col = bcol + kcol;                 /* local col index */
2449           col += rowners_bs[rank+1];      /* global col index */
2450           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2451           for (krow=0; krow<bs; krow++){
2452             atmp = PetscAbsScalar(*ba); ba++;
2453             row = brow + krow;    /* local row index */
2454             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2455             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2456             if (work[col] < atmp) work[col] = atmp;
2457           }
2458         }
2459         bj++;
2460       }
2461     }
2462     /*
2463       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2464       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2465       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2466       */
2467 
2468     /* send values to its owners */
2469     for (dest=rank+1; dest<size; dest++){
2470       svalues = work + rowners_bs[dest];
2471       count   = rowners_bs[dest+1]-rowners_bs[dest];
2472       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2473       /*
2474       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2475       PetscSynchronizedFlush(A->comm);
2476       */
2477     }
2478   }
2479 
2480   /* receive values */
2481   if (rank){
2482     rvalues = work;
2483     count   = rowners_bs[rank+1]-rowners_bs[rank];
2484     for (source=0; source<rank; source++){
2485       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2486       /* process values */
2487       for (i=0; i<count; i++){
2488         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2489       }
2490       /*
2491       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2492       PetscSynchronizedFlush(A->comm);
2493       */
2494     }
2495   }
2496 
2497   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2498   ierr = PetscFree(work);CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatRelax_MPISBAIJ"
2504 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2505 {
2506   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2507   int            ierr,mbs=mat->mbs,bs=mat->bs;
2508   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2509   Vec            bb1;
2510 
2511   PetscFunctionBegin;
2512   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2513   if (bs > 1)
2514     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2515 
2516   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2517     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2518       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2519       its--;
2520     }
2521 
2522     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2523     while (its--){
2524 
2525       /* lower triangular part: slvec0b = - B^T*xx */
2526       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2527 
2528       /* copy xx into slvec0a */
2529       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2530       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2531       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2532       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2533 
2534       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2535 
2536       /* copy bb into slvec1a */
2537       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2538       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2539       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2540       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2541 
2542       /* set slvec1b = 0 */
2543       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2544 
2545       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2546       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2547       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2548       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2549 
2550       /* upper triangular part: bb1 = bb1 - B*x */
2551       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2552 
2553       /* local diagonal sweep */
2554       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2555     }
2556     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2557   } else {
2558     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2559   }
2560   PetscFunctionReturn(0);
2561 }
2562 
2563 #undef __FUNCT__
2564 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2565 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2566 {
2567   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2568   int            ierr;
2569   PetscScalar    mone=-1.0;
2570   Vec            lvec1,bb1;
2571 
2572   PetscFunctionBegin;
2573   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2574   if (mat->bs > 1)
2575     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2576 
2577   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2578     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2579       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2580       its--;
2581     }
2582 
2583     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2584     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2585     while (its--){
2586       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2587 
2588       /* lower diagonal part: bb1 = bb - B^T*xx */
2589       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2590       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2591 
2592       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2593       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2594       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2595 
2596       /* upper diagonal part: bb1 = bb1 - B*x */
2597       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2598       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2599 
2600       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2601 
2602       /* diagonal sweep */
2603       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2604     }
2605     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2606     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2607   } else {
2608     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2609   }
2610   PetscFunctionReturn(0);
2611 }
2612 
2613