xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision fb10cecf1927e102c85c17a39b04f58da85ca3ce)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
83 static int CreateColmap_MPISBAIJ_Private(Mat mat)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"Function not yet written for SBAIJ format");
87   /* PetscFunctionReturn(0); */
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
123  \
124         /* malloc new storage space */ \
125         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
127         new_j = (int*)(new_a + bs2*new_nz); \
128         new_i = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
199  \
200         /* malloc new storage space */ \
201         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
203         new_j = (int*)(new_a + bs2*new_nz); \
204         new_i = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNCT__
248 #define __FUNCT__ "MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int          ierr,i,N = m*n;
253   MatScalar    *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,"Function not yet written for SBAIJ format");
302   /* PetscFunctionReturn(0); */
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,"Function not yet written for SBAIJ format");
315   /* PetscFunctionReturn(0); */
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar    value;
328   PetscTruth   roworiented = baij->roworiented;
329   int          ierr,i,j,row,col;
330   int          rstart_orig=baij->rstart_bs;
331   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
332   int          cend_orig=baij->cend_bs,bs=baij->bs;
333 
334   /* Some Variables required in the macro */
335   Mat          A = baij->A;
336   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
337   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
338   MatScalar    *aa=a->a;
339 
340   Mat          B = baij->B;
341   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
342   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
343   MatScalar    *ba=b->a;
344 
345   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
346   int          low,high,t,ridx,cidx,bs2=a->bs2;
347   MatScalar    *ap,*bap;
348 
349   /* for stash */
350   int          n_loc, *in_loc=0;
351   MatScalar    *v_loc=0;
352 
353   PetscFunctionBegin;
354 
355   if(!baij->donotstash){
356     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
357     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
358   }
359 
360   for (i=0; i<m; i++) {
361     if (im[i] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
364 #endif
365     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
366       row = im[i] - rstart_orig;              /* local row index */
367       for (j=0; j<n; j++) {
368         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
369         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
370           col = in[j] - cstart_orig;          /* local col index */
371           brow = row/bs; bcol = col/bs;
372           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         } else if (in[j] < 0) continue;
377 #if defined(PETSC_USE_BOPT_g)
378         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
379 #endif
380         else {  /* off-diag entry (B) */
381           if (mat->was_assembled) {
382             if (!baij->colmap) {
383               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
384             }
385 #if defined (PETSC_USE_CTABLE)
386             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
387             col  = col - 1;
388 #else
389             col = baij->colmap[in[j]/bs] - 1;
390 #endif
391             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
395               B = baij->B;
396               b = (Mat_SeqBAIJ*)(B)->data;
397               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
398               ba=b->a;
399             } else col += in[j]%bs;
400           } else col = in[j];
401           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
402           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
403           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
404         }
405       }
406     } else {  /* off processor entry */
407       if (!baij->donotstash) {
408         n_loc = 0;
409         for (j=0; j<n; j++){
410           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
411           in_loc[n_loc] = in[j];
412           if (roworiented) {
413             v_loc[n_loc] = v[i*n+j];
414           } else {
415             v_loc[n_loc] = v[j*m+i];
416           }
417           n_loc++;
418         }
419         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
420       }
421     }
422   }
423 
424   if(!baij->donotstash){
425     ierr = PetscFree(in_loc);CHKERRQ(ierr);
426     ierr = PetscFree(v_loc);CHKERRQ(ierr);
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
434 {
435   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
436   const MatScalar *value;
437   MatScalar       *barray=baij->barray;
438   PetscTruth      roworiented = baij->roworiented;
439   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
440   int             rend=baij->rend,cstart=baij->cstart,stepval;
441   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
442 
443   PetscFunctionBegin;
444   if(!barray) {
445     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
446     baij->barray = barray;
447   }
448 
449   if (roworiented) {
450     stepval = (n-1)*bs;
451   } else {
452     stepval = (m-1)*bs;
453   }
454   for (i=0; i<m; i++) {
455     if (im[i] < 0) continue;
456 #if defined(PETSC_USE_BOPT_g)
457     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
458 #endif
459     if (im[i] >= rstart && im[i] < rend) {
460       row = im[i] - rstart;
461       for (j=0; j<n; j++) {
462         /* If NumCol = 1 then a copy is not required */
463         if ((roworiented) && (n == 1)) {
464           barray = (MatScalar*) v + i*bs2;
465         } else if((!roworiented) && (m == 1)) {
466           barray = (MatScalar*) v + j*bs2;
467         } else { /* Here a copy is required */
468           if (roworiented) {
469             value = v + i*(stepval+bs)*bs + j*bs;
470           } else {
471             value = v + j*(stepval+bs)*bs + i*bs;
472           }
473           for (ii=0; ii<bs; ii++,value+=stepval) {
474             for (jj=0; jj<bs; jj++) {
475               *barray++  = *value++;
476             }
477           }
478           barray -=bs2;
479         }
480 
481         if (in[j] >= cstart && in[j] < cend){
482           col  = in[j] - cstart;
483           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
484         }
485         else if (in[j] < 0) continue;
486 #if defined(PETSC_USE_BOPT_g)
487         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
488 #endif
489         else {
490           if (mat->was_assembled) {
491             if (!baij->colmap) {
492               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
493             }
494 
495 #if defined(PETSC_USE_BOPT_g)
496 #if defined (PETSC_USE_CTABLE)
497             { int data;
498               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
499               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
500             }
501 #else
502             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
503 #endif
504 #endif
505 #if defined (PETSC_USE_CTABLE)
506 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
507             col  = (col - 1)/bs;
508 #else
509             col = (baij->colmap[in[j]] - 1)/bs;
510 #endif
511             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
512               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
513               col =  in[j];
514             }
515           }
516           else col = in[j];
517           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
518         }
519       }
520     } else {
521       if (!baij->donotstash) {
522         if (roworiented) {
523           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
524         } else {
525           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
526         }
527       }
528     }
529   }
530   PetscFunctionReturn(0);
531 }
532 
533 #define HASH_KEY 0.6180339887
534 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
535 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
536 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
537 #undef __FUNCT__
538 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
539 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
540 {
541   PetscFunctionBegin;
542   SETERRQ(1,"Function not yet written for SBAIJ format");
543   /* PetscFunctionReturn(0); */
544 }
545 
546 #undef __FUNCT__
547 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
548 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
549 {
550   PetscFunctionBegin;
551   SETERRQ(1,"Function not yet written for SBAIJ format");
552   /* PetscFunctionReturn(0); */
553 }
554 
555 #undef __FUNCT__
556 #define __FUNCT__ "MatGetValues_MPISBAIJ"
557 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
558 {
559   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
560   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
561   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
562 
563   PetscFunctionBegin;
564   for (i=0; i<m; i++) {
565     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
566     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
567     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
568       row = idxm[i] - bsrstart;
569       for (j=0; j<n; j++) {
570         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
571         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
572         if (idxn[j] >= bscstart && idxn[j] < bscend){
573           col = idxn[j] - bscstart;
574           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575         } else {
576           if (!baij->colmap) {
577             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
578           }
579 #if defined (PETSC_USE_CTABLE)
580           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
581           data --;
582 #else
583           data = baij->colmap[idxn[j]/bs]-1;
584 #endif
585           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
586           else {
587             col  = data + idxn[j]%bs;
588             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
589           }
590         }
591       }
592     } else {
593       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
594     }
595   }
596  PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatNorm_MPISBAIJ"
601 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
602 {
603   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
604   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
605   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
606   int        ierr;
607   PetscReal  sum[2],*lnorm2;
608 
609   PetscFunctionBegin;
610   if (baij->size == 1) {
611     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
612   } else {
613     if (type == NORM_FROBENIUS) {
614       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
615       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
616       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
617       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
618       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
619       /*
620       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
621       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
622       */
623       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
624       /*
625       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
626       PetscSynchronizedFlush(mat->comm); */
627 
628       *norm = sqrt(sum[0] + 2*sum[1]);
629       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
630     } else {
631       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
632     }
633   }
634   PetscFunctionReturn(0);
635 }
636 
637 /*
638   Creates the hash table, and sets the table
639   This table is created only once.
640   If new entried need to be added to the matrix
641   then the hash table has to be destroyed and
642   recreated.
643 */
644 #undef __FUNCT__
645 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
646 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
647 {
648   PetscFunctionBegin;
649   SETERRQ(1,"Function not yet written for SBAIJ format");
650   /* PetscFunctionReturn(0); */
651 }
652 
653 #undef __FUNCT__
654 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
655 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
656 {
657   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
658   int         ierr,nstash,reallocs;
659   InsertMode  addv;
660 
661   PetscFunctionBegin;
662   if (baij->donotstash) {
663     PetscFunctionReturn(0);
664   }
665 
666   /* make sure all processors are either in INSERTMODE or ADDMODE */
667   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
668   if (addv == (ADD_VALUES|INSERT_VALUES)) {
669     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
670   }
671   mat->insertmode = addv; /* in case this processor had no cache */
672 
673   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
674   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
675   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
676   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
677   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
678   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
679   PetscFunctionReturn(0);
680 }
681 
682 #undef __FUNCT__
683 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
684 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
685 {
686   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
687   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
688   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
689   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
690   int         *row,*col,other_disassembled;
691   PetscTruth  r1,r2,r3;
692   MatScalar   *val;
693   InsertMode  addv = mat->insertmode;
694 
695   PetscFunctionBegin;
696 
697   if (!baij->donotstash) {
698     while (1) {
699       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
700       /*
701       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
702       PetscSynchronizedFlush(mat->comm);
703       */
704       if (!flg) break;
705 
706       for (i=0; i<n;) {
707         /* Now identify the consecutive vals belonging to the same row */
708         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
709         if (j < n) ncols = j-i;
710         else       ncols = n-i;
711         /* Now assemble all these values with a single function call */
712         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
713         i = j;
714       }
715     }
716     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
717     /* Now process the block-stash. Since the values are stashed column-oriented,
718        set the roworiented flag to column oriented, and after MatSetValues()
719        restore the original flags */
720     r1 = baij->roworiented;
721     r2 = a->roworiented;
722     r3 = b->roworiented;
723     baij->roworiented = PETSC_FALSE;
724     a->roworiented    = PETSC_FALSE;
725     b->roworiented    = PETSC_FALSE;
726     while (1) {
727       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
728       if (!flg) break;
729 
730       for (i=0; i<n;) {
731         /* Now identify the consecutive vals belonging to the same row */
732         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
733         if (j < n) ncols = j-i;
734         else       ncols = n-i;
735         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
736         i = j;
737       }
738     }
739     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
740     baij->roworiented = r1;
741     a->roworiented    = r2;
742     b->roworiented    = r3;
743   }
744 
745   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
746   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
747 
748   /* determine if any processor has disassembled, if so we must
749      also disassemble ourselfs, in order that we may reassemble. */
750   /*
751      if nonzero structure of submatrix B cannot change then we know that
752      no processor disassembled thus we can skip this stuff
753   */
754   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
755     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
756     if (mat->was_assembled && !other_disassembled) {
757       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
758     }
759   }
760 
761   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
762     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
763   }
764   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
765   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
766 
767 #if defined(PETSC_USE_BOPT_g)
768   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
769     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
770     baij->ht_total_ct  = 0;
771     baij->ht_insert_ct = 0;
772   }
773 #endif
774   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
775     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
776     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
777     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
778   }
779 
780   if (baij->rowvalues) {
781     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
782     baij->rowvalues = 0;
783   }
784 
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
790 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
791 {
792   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
793   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
794   PetscTruth        isascii,isdraw;
795   PetscViewer       sviewer;
796   PetscViewerFormat format;
797 
798   PetscFunctionBegin;
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
801   if (isascii) {
802     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
803     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
804       MatInfo info;
805       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
806       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
807       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
808               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
809               baij->bs,(int)info.memory);CHKERRQ(ierr);
810       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
811       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
812       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
813       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
814       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
815       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
816       PetscFunctionReturn(0);
817     } else if (format == PETSC_VIEWER_ASCII_INFO) {
818       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
819       PetscFunctionReturn(0);
820     }
821   }
822 
823   if (isdraw) {
824     PetscDraw       draw;
825     PetscTruth isnull;
826     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
827     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
828   }
829 
830   if (size == 1) {
831     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
832     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
833   } else {
834     /* assemble the entire matrix onto first processor. */
835     Mat         A;
836     Mat_SeqSBAIJ *Aloc;
837     Mat_SeqBAIJ *Bloc;
838     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
839     MatScalar   *a;
840 
841     if (!rank) {
842       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
843     } else {
844       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
845     }
846     PetscLogObjectParent(mat,A);
847 
848     /* copy over the A part */
849     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
850     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
851     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
852 
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = (baij->cstart+aj[j])*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     /* copy over the B part */
865     Bloc = (Mat_SeqBAIJ*)baij->B->data;
866     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
867     for (i=0; i<mbs; i++) {
868       rvals[0] = bs*(baij->rstart + i);
869       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
870       for (j=ai[i]; j<ai[i+1]; j++) {
871         col = baij->garray[aj[j]]*bs;
872         for (k=0; k<bs; k++) {
873           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
874           col++; a += bs;
875         }
876       }
877     }
878     ierr = PetscFree(rvals);CHKERRQ(ierr);
879     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
880     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
881     /*
882        Everyone has to call to draw the matrix since the graphics waits are
883        synchronized across all processors that share the PetscDraw object
884     */
885     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
886     if (!rank) {
887       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
888       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
889     }
890     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
891     ierr = MatDestroy(A);CHKERRQ(ierr);
892   }
893   PetscFunctionReturn(0);
894 }
895 
896 #undef __FUNCT__
897 #define __FUNCT__ "MatView_MPISBAIJ"
898 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
899 {
900   int        ierr;
901   PetscTruth isascii,isdraw,issocket,isbinary;
902 
903   PetscFunctionBegin;
904   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
905   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
906   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
907   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
908   if (isascii || isdraw || issocket || isbinary) {
909     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
910   } else {
911     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
912   }
913   PetscFunctionReturn(0);
914 }
915 
916 #undef __FUNCT__
917 #define __FUNCT__ "MatDestroy_MPISBAIJ"
918 int MatDestroy_MPISBAIJ(Mat mat)
919 {
920   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
921   int         ierr;
922 
923   PetscFunctionBegin;
924 #if defined(PETSC_USE_LOG)
925   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
926 #endif
927   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
928   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
929   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
930   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
931   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
932 #if defined (PETSC_USE_CTABLE)
933   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
934 #else
935   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
936 #endif
937   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
938   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
939   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
940   if (baij->slvec0) {
941     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
942     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
943   }
944   if (baij->slvec1) {
945     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
946     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
947     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
948   }
949   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
950   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
951   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
952   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
953 #if defined(PETSC_USE_MAT_SINGLE)
954   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
955 #endif
956   ierr = PetscFree(baij);CHKERRQ(ierr);
957   PetscFunctionReturn(0);
958 }
959 
960 #undef __FUNCT__
961 #define __FUNCT__ "MatMult_MPISBAIJ"
962 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
963 {
964   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
965   int         ierr,nt,mbs=a->mbs,bs=a->bs;
966   PetscScalar *x,*from,zero=0.0;
967 
968   PetscFunctionBegin;
969   /*
970   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
971   PetscSynchronizedFlush(A->comm);
972   */
973   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
974   if (nt != A->n) {
975     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
976   }
977   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
978   if (nt != A->m) {
979     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
980   }
981 
982   /* diagonal part */
983   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
984   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
985 
986   /* subdiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
988 
989   /* copy x into the vec slvec0 */
990   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
991   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
992   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
993   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
994 
995   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
996   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
997   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
998 
999   /* supperdiagonal part */
1000   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1001 
1002   PetscFunctionReturn(0);
1003 }
1004 
1005 #undef __FUNCT__
1006 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1007 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1008 {
1009   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1010   int         ierr,nt;
1011 
1012   PetscFunctionBegin;
1013   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1014   if (nt != A->n) {
1015     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1016   }
1017   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1018   if (nt != A->m) {
1019     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1020   }
1021 
1022   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1023   /* do diagonal part */
1024   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1025   /* do supperdiagonal part */
1026   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1027   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1028   /* do subdiagonal part */
1029   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1030   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1031   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1032 
1033   PetscFunctionReturn(0);
1034 }
1035 
1036 #undef __FUNCT__
1037 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1038 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1039 {
1040   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1041   int          ierr,mbs=a->mbs,bs=a->bs;
1042   PetscScalar  *x,*from,zero=0.0;
1043 
1044   PetscFunctionBegin;
1045   /*
1046   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1047   PetscSynchronizedFlush(A->comm);
1048   */
1049   /* diagonal part */
1050   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1051   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1052 
1053   /* subdiagonal part */
1054   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1055 
1056   /* copy x into the vec slvec0 */
1057   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1058   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1059   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1060   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1061 
1062   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1063   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1064   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1065 
1066   /* supperdiagonal part */
1067   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1068 
1069   PetscFunctionReturn(0);
1070 }
1071 
1072 #undef __FUNCT__
1073 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1074 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1075 {
1076   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1077   int        ierr;
1078 
1079   PetscFunctionBegin;
1080   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1081   /* do diagonal part */
1082   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1083   /* do supperdiagonal part */
1084   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1085   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1086 
1087   /* do subdiagonal part */
1088   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1089   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1090   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1091 
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1097 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1098 {
1099   PetscFunctionBegin;
1100   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1101   /* PetscFunctionReturn(0); */
1102 }
1103 
1104 #undef __FUNCT__
1105 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1106 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1107 {
1108   PetscFunctionBegin;
1109   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1110   /* PetscFunctionReturn(0); */
1111 }
1112 
1113 /*
1114   This only works correctly for square matrices where the subblock A->A is the
1115    diagonal block
1116 */
1117 #undef __FUNCT__
1118 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1119 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1120 {
1121   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1122   int         ierr;
1123 
1124   PetscFunctionBegin;
1125   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1126   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1127   PetscFunctionReturn(0);
1128 }
1129 
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatScale_MPISBAIJ"
1132 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1133 {
1134   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1135   int         ierr;
1136 
1137   PetscFunctionBegin;
1138   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1139   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1140   PetscFunctionReturn(0);
1141 }
1142 
1143 #undef __FUNCT__
1144 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1145 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1146 {
1147   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1148   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1149   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1150   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1151   int            *cmap,*idx_p,cstart = mat->cstart;
1152 
1153   PetscFunctionBegin;
1154   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1155   mat->getrowactive = PETSC_TRUE;
1156 
1157   if (!mat->rowvalues && (idx || v)) {
1158     /*
1159         allocate enough space to hold information from the longest row.
1160     */
1161     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1162     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1163     int     max = 1,mbs = mat->mbs,tmp;
1164     for (i=0; i<mbs; i++) {
1165       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1166       if (max < tmp) { max = tmp; }
1167     }
1168     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1169     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1170   }
1171 
1172   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1173   lrow = row - brstart;  /* local row index */
1174 
1175   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1176   if (!v)   {pvA = 0; pvB = 0;}
1177   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1178   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1179   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1180   nztot = nzA + nzB;
1181 
1182   cmap  = mat->garray;
1183   if (v  || idx) {
1184     if (nztot) {
1185       /* Sort by increasing column numbers, assuming A and B already sorted */
1186       int imark = -1;
1187       if (v) {
1188         *v = v_p = mat->rowvalues;
1189         for (i=0; i<nzB; i++) {
1190           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1191           else break;
1192         }
1193         imark = i;
1194         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1195         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1196       }
1197       if (idx) {
1198         *idx = idx_p = mat->rowindices;
1199         if (imark > -1) {
1200           for (i=0; i<imark; i++) {
1201             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1202           }
1203         } else {
1204           for (i=0; i<nzB; i++) {
1205             if (cmap[cworkB[i]/bs] < cstart)
1206               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1207             else break;
1208           }
1209           imark = i;
1210         }
1211         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1212         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1213       }
1214     } else {
1215       if (idx) *idx = 0;
1216       if (v)   *v   = 0;
1217     }
1218   }
1219   *nz = nztot;
1220   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1221   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1227 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1228 {
1229   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1230 
1231   PetscFunctionBegin;
1232   if (baij->getrowactive == PETSC_FALSE) {
1233     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1234   }
1235   baij->getrowactive = PETSC_FALSE;
1236   PetscFunctionReturn(0);
1237 }
1238 
1239 #undef __FUNCT__
1240 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1241 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1242 {
1243   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1244 
1245   PetscFunctionBegin;
1246   *bs = baij->bs;
1247   PetscFunctionReturn(0);
1248 }
1249 
1250 #undef __FUNCT__
1251 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1252 int MatZeroEntries_MPISBAIJ(Mat A)
1253 {
1254   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1255   int         ierr;
1256 
1257   PetscFunctionBegin;
1258   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1259   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1265 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1266 {
1267   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1268   Mat         A = a->A,B = a->B;
1269   int         ierr;
1270   PetscReal   isend[5],irecv[5];
1271 
1272   PetscFunctionBegin;
1273   info->block_size     = (PetscReal)a->bs;
1274   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1275   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1276   isend[3] = info->memory;  isend[4] = info->mallocs;
1277   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1278   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1279   isend[3] += info->memory;  isend[4] += info->mallocs;
1280   if (flag == MAT_LOCAL) {
1281     info->nz_used      = isend[0];
1282     info->nz_allocated = isend[1];
1283     info->nz_unneeded  = isend[2];
1284     info->memory       = isend[3];
1285     info->mallocs      = isend[4];
1286   } else if (flag == MAT_GLOBAL_MAX) {
1287     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1288     info->nz_used      = irecv[0];
1289     info->nz_allocated = irecv[1];
1290     info->nz_unneeded  = irecv[2];
1291     info->memory       = irecv[3];
1292     info->mallocs      = irecv[4];
1293   } else if (flag == MAT_GLOBAL_SUM) {
1294     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1295     info->nz_used      = irecv[0];
1296     info->nz_allocated = irecv[1];
1297     info->nz_unneeded  = irecv[2];
1298     info->memory       = irecv[3];
1299     info->mallocs      = irecv[4];
1300   } else {
1301     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1302   }
1303   info->rows_global       = (PetscReal)A->M;
1304   info->columns_global    = (PetscReal)A->N;
1305   info->rows_local        = (PetscReal)A->m;
1306   info->columns_local     = (PetscReal)A->N;
1307   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1308   info->fill_ratio_needed = 0;
1309   info->factor_mallocs    = 0;
1310   PetscFunctionReturn(0);
1311 }
1312 
1313 #undef __FUNCT__
1314 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1315 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1316 {
1317   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1318   int         ierr;
1319 
1320   PetscFunctionBegin;
1321   switch (op) {
1322   case MAT_NO_NEW_NONZERO_LOCATIONS:
1323   case MAT_YES_NEW_NONZERO_LOCATIONS:
1324   case MAT_COLUMNS_UNSORTED:
1325   case MAT_COLUMNS_SORTED:
1326   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1327   case MAT_KEEP_ZEROED_ROWS:
1328   case MAT_NEW_NONZERO_LOCATION_ERR:
1329     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1330     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1331     break;
1332   case MAT_ROW_ORIENTED:
1333     a->roworiented = PETSC_TRUE;
1334     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1335     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1336     break;
1337   case MAT_ROWS_SORTED:
1338   case MAT_ROWS_UNSORTED:
1339   case MAT_YES_NEW_DIAGONALS:
1340     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1341     break;
1342   case MAT_COLUMN_ORIENTED:
1343     a->roworiented = PETSC_FALSE;
1344     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1345     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1346     break;
1347   case MAT_IGNORE_OFF_PROC_ENTRIES:
1348     a->donotstash = PETSC_TRUE;
1349     break;
1350   case MAT_NO_NEW_DIAGONALS:
1351     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1352   case MAT_USE_HASH_TABLE:
1353     a->ht_flag = PETSC_TRUE;
1354     break;
1355   case MAT_SYMMETRIC:
1356   case MAT_STRUCTURALLY_SYMMETRIC:
1357     break;
1358   default:
1359     SETERRQ(PETSC_ERR_SUP,"unknown option");
1360   }
1361   PetscFunctionReturn(0);
1362 }
1363 
1364 #undef __FUNCT__
1365 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1366 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1367 {
1368   int ierr;
1369   PetscFunctionBegin;
1370   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1371   PetscFunctionReturn(0);
1372 }
1373 
1374 #undef __FUNCT__
1375 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1376 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1377 {
1378   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1379   Mat         a = baij->A,b = baij->B;
1380   int         ierr,s1,s2,s3;
1381 
1382   PetscFunctionBegin;
1383   if (ll != rr) {
1384     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1385   }
1386   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1387   if (rr) {
1388     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1389     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1390     /* Overlap communication with computation. */
1391     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1392     /*} if (ll) { */
1393     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1394     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1395     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1396     /* } */
1397   /* scale  the diagonal block */
1398   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1399 
1400   /* if (rr) { */
1401     /* Do a scatter end and then right scale the off-diagonal block */
1402     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1403     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1404   }
1405 
1406   PetscFunctionReturn(0);
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1411 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1412 {
1413   PetscFunctionBegin;
1414   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1415 }
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1419 int MatPrintHelp_MPISBAIJ(Mat A)
1420 {
1421   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1422   MPI_Comm    comm = A->comm;
1423   static int  called = 0;
1424   int         ierr;
1425 
1426   PetscFunctionBegin;
1427   if (!a->rank) {
1428     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1429   }
1430   if (called) {PetscFunctionReturn(0);} else called = 1;
1431   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1432   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1438 int MatSetUnfactored_MPISBAIJ(Mat A)
1439 {
1440   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1441   int         ierr;
1442 
1443   PetscFunctionBegin;
1444   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1445   PetscFunctionReturn(0);
1446 }
1447 
1448 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1449 
1450 #undef __FUNCT__
1451 #define __FUNCT__ "MatEqual_MPISBAIJ"
1452 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1453 {
1454   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1455   Mat         a,b,c,d;
1456   PetscTruth  flg;
1457   int         ierr;
1458 
1459   PetscFunctionBegin;
1460   a = matA->A; b = matA->B;
1461   c = matB->A; d = matB->B;
1462 
1463   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1464   if (flg == PETSC_TRUE) {
1465     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1466   }
1467   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1468   PetscFunctionReturn(0);
1469 }
1470 
1471 #undef __FUNCT__
1472 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1473 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1474 {
1475   int        ierr;
1476 
1477   PetscFunctionBegin;
1478   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 /* -------------------------------------------------------------------*/
1482 static struct _MatOps MatOps_Values = {
1483        MatSetValues_MPISBAIJ,
1484        MatGetRow_MPISBAIJ,
1485        MatRestoreRow_MPISBAIJ,
1486        MatMult_MPISBAIJ,
1487 /* 4*/ MatMultAdd_MPISBAIJ,
1488        MatMultTranspose_MPISBAIJ,
1489        MatMultTransposeAdd_MPISBAIJ,
1490        0,
1491        0,
1492        0,
1493 /*10*/ 0,
1494        0,
1495        0,
1496        MatRelax_MPISBAIJ,
1497        MatTranspose_MPISBAIJ,
1498 /*15*/ MatGetInfo_MPISBAIJ,
1499        MatEqual_MPISBAIJ,
1500        MatGetDiagonal_MPISBAIJ,
1501        MatDiagonalScale_MPISBAIJ,
1502        MatNorm_MPISBAIJ,
1503 /*20*/ MatAssemblyBegin_MPISBAIJ,
1504        MatAssemblyEnd_MPISBAIJ,
1505        0,
1506        MatSetOption_MPISBAIJ,
1507        MatZeroEntries_MPISBAIJ,
1508 /*25*/ MatZeroRows_MPISBAIJ,
1509        0,
1510        0,
1511        0,
1512        0,
1513 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*35*/ MatDuplicate_MPISBAIJ,
1519        0,
1520        0,
1521        0,
1522        0,
1523 /*40*/ 0,
1524        0,
1525        0,
1526        MatGetValues_MPISBAIJ,
1527        0,
1528 /*45*/ MatPrintHelp_MPISBAIJ,
1529        MatScale_MPISBAIJ,
1530        0,
1531        0,
1532        0,
1533 /*50*/ MatGetBlockSize_MPISBAIJ,
1534        0,
1535        0,
1536        0,
1537        0,
1538 /*55*/ 0,
1539        0,
1540        MatSetUnfactored_MPISBAIJ,
1541        0,
1542        MatSetValuesBlocked_MPISBAIJ,
1543 /*60*/ 0,
1544        0,
1545        0,
1546        MatGetPetscMaps_Petsc,
1547        0,
1548 /*65*/ 0,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*70*/ MatGetRowMax_MPISBAIJ,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*75*/ 0,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*80*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*85*/ MatLoad_MPISBAIJ
1569 };
1570 
1571 
1572 EXTERN_C_BEGIN
1573 #undef __FUNCT__
1574 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1575 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1576 {
1577   PetscFunctionBegin;
1578   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1579   *iscopy = PETSC_FALSE;
1580   PetscFunctionReturn(0);
1581 }
1582 EXTERN_C_END
1583 
1584 EXTERN_C_BEGIN
1585 #undef __FUNCT__
1586 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1587 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1588 {
1589   Mat_MPISBAIJ *b;
1590   int          ierr,i,mbs,Mbs;
1591 
1592   PetscFunctionBegin;
1593   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1594 
1595   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1596   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1597   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1598   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1599   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1600   if (d_nnz) {
1601     for (i=0; i<B->m/bs; i++) {
1602       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1603     }
1604   }
1605   if (o_nnz) {
1606     for (i=0; i<B->m/bs; i++) {
1607       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1608     }
1609   }
1610   B->preallocated = PETSC_TRUE;
1611   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1612   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1613   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1614   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1615 
1616   b   = (Mat_MPISBAIJ*)B->data;
1617   mbs = B->m/bs;
1618   Mbs = B->M/bs;
1619   if (mbs*bs != B->m) {
1620     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1621   }
1622 
1623   b->bs  = bs;
1624   b->bs2 = bs*bs;
1625   b->mbs = mbs;
1626   b->nbs = mbs;
1627   b->Mbs = Mbs;
1628   b->Nbs = Mbs;
1629 
1630   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1631   b->rowners[0]    = 0;
1632   for (i=2; i<=b->size; i++) {
1633     b->rowners[i] += b->rowners[i-1];
1634   }
1635   b->rstart    = b->rowners[b->rank];
1636   b->rend      = b->rowners[b->rank+1];
1637   b->cstart    = b->rstart;
1638   b->cend      = b->rend;
1639   for (i=0; i<=b->size; i++) {
1640     b->rowners_bs[i] = b->rowners[i]*bs;
1641   }
1642   b->rstart_bs = b-> rstart*bs;
1643   b->rend_bs   = b->rend*bs;
1644 
1645   b->cstart_bs = b->cstart*bs;
1646   b->cend_bs   = b->cend*bs;
1647 
1648 
1649   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1650   PetscLogObjectParent(B,b->A);
1651   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1652   PetscLogObjectParent(B,b->B);
1653 
1654   /* build cache for off array entries formed */
1655   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1656 
1657   PetscFunctionReturn(0);
1658 }
1659 EXTERN_C_END
1660 
1661 /*MC
1662    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1663    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1664 
1665    Options Database Keys:
1666 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1667 
1668   Level: beginner
1669 
1670 .seealso: MatCreateMPISBAIJ
1671 M*/
1672 
1673 EXTERN_C_BEGIN
1674 #undef __FUNCT__
1675 #define __FUNCT__ "MatCreate_MPISBAIJ"
1676 int MatCreate_MPISBAIJ(Mat B)
1677 {
1678   Mat_MPISBAIJ *b;
1679   int          ierr;
1680   PetscTruth   flg;
1681 
1682   PetscFunctionBegin;
1683 
1684   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1685   B->data = (void*)b;
1686   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1687   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1688 
1689   B->ops->destroy    = MatDestroy_MPISBAIJ;
1690   B->ops->view       = MatView_MPISBAIJ;
1691   B->mapping    = 0;
1692   B->factor     = 0;
1693   B->assembled  = PETSC_FALSE;
1694 
1695   B->insertmode = NOT_SET_VALUES;
1696   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1697   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1698 
1699   /* build local table of row and column ownerships */
1700   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1701   b->cowners    = b->rowners + b->size + 2;
1702   b->rowners_bs = b->cowners + b->size + 2;
1703   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1704 
1705   /* build cache for off array entries formed */
1706   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1707   b->donotstash  = PETSC_FALSE;
1708   b->colmap      = PETSC_NULL;
1709   b->garray      = PETSC_NULL;
1710   b->roworiented = PETSC_TRUE;
1711 
1712 #if defined(PETSC_USE_MAT_SINGLE)
1713   /* stuff for MatSetValues_XXX in single precision */
1714   b->setvalueslen     = 0;
1715   b->setvaluescopy    = PETSC_NULL;
1716 #endif
1717 
1718   /* stuff used in block assembly */
1719   b->barray       = 0;
1720 
1721   /* stuff used for matrix vector multiply */
1722   b->lvec         = 0;
1723   b->Mvctx        = 0;
1724   b->slvec0       = 0;
1725   b->slvec0b      = 0;
1726   b->slvec1       = 0;
1727   b->slvec1a      = 0;
1728   b->slvec1b      = 0;
1729   b->sMvctx       = 0;
1730 
1731   /* stuff for MatGetRow() */
1732   b->rowindices   = 0;
1733   b->rowvalues    = 0;
1734   b->getrowactive = PETSC_FALSE;
1735 
1736   /* hash table stuff */
1737   b->ht           = 0;
1738   b->hd           = 0;
1739   b->ht_size      = 0;
1740   b->ht_flag      = PETSC_FALSE;
1741   b->ht_fact      = 0;
1742   b->ht_total_ct  = 0;
1743   b->ht_insert_ct = 0;
1744 
1745   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1746   if (flg) {
1747     PetscReal fact = 1.39;
1748     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1749     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1750     if (fact <= 1.0) fact = 1.39;
1751     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1752     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1753   }
1754   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1755                                      "MatStoreValues_MPISBAIJ",
1756                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1757   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1758                                      "MatRetrieveValues_MPISBAIJ",
1759                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1760   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1761                                      "MatGetDiagonalBlock_MPISBAIJ",
1762                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1763   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1764                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1765                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1766   PetscFunctionReturn(0);
1767 }
1768 EXTERN_C_END
1769 
1770 /*MC
1771    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1772 
1773    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1774    and MATMPISBAIJ otherwise.
1775 
1776    Options Database Keys:
1777 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1778 
1779   Level: beginner
1780 
1781 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1782 M*/
1783 
1784 EXTERN_C_BEGIN
1785 #undef __FUNCT__
1786 #define __FUNCT__ "MatCreate_SBAIJ"
1787 int MatCreate_SBAIJ(Mat A) {
1788   int ierr,size;
1789 
1790   PetscFunctionBegin;
1791   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1792   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1793   if (size == 1) {
1794     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1795   } else {
1796     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1797   }
1798   PetscFunctionReturn(0);
1799 }
1800 EXTERN_C_END
1801 
1802 #undef __FUNCT__
1803 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1804 /*@C
1805    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1806    the user should preallocate the matrix storage by setting the parameters
1807    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1808    performance can be increased by more than a factor of 50.
1809 
1810    Collective on Mat
1811 
1812    Input Parameters:
1813 +  A - the matrix
1814 .  bs   - size of blockk
1815 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1816            submatrix  (same for all local rows)
1817 .  d_nnz - array containing the number of block nonzeros in the various block rows
1818            in the upper triangular and diagonal part of the in diagonal portion of the local
1819            (possibly different for each block row) or PETSC_NULL.  You must leave room
1820            for the diagonal entry even if it is zero.
1821 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1822            submatrix (same for all local rows).
1823 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1824            off-diagonal portion of the local submatrix (possibly different for
1825            each block row) or PETSC_NULL.
1826 
1827 
1828    Options Database Keys:
1829 .   -mat_no_unroll - uses code that does not unroll the loops in the
1830                      block calculations (much slower)
1831 .   -mat_block_size - size of the blocks to use
1832 
1833    Notes:
1834 
1835    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1836    than it must be used on all processors that share the object for that argument.
1837 
1838    Storage Information:
1839    For a square global matrix we define each processor's diagonal portion
1840    to be its local rows and the corresponding columns (a square submatrix);
1841    each processor's off-diagonal portion encompasses the remainder of the
1842    local matrix (a rectangular submatrix).
1843 
1844    The user can specify preallocated storage for the diagonal part of
1845    the local submatrix with either d_nz or d_nnz (not both).  Set
1846    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1847    memory allocation.  Likewise, specify preallocated storage for the
1848    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1849 
1850    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1851    the figure below we depict these three local rows and all columns (0-11).
1852 
1853 .vb
1854            0 1 2 3 4 5 6 7 8 9 10 11
1855           -------------------
1856    row 3  |  o o o d d d o o o o o o
1857    row 4  |  o o o d d d o o o o o o
1858    row 5  |  o o o d d d o o o o o o
1859           -------------------
1860 .ve
1861 
1862    Thus, any entries in the d locations are stored in the d (diagonal)
1863    submatrix, and any entries in the o locations are stored in the
1864    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1865    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1866 
1867    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1868    plus the diagonal part of the d matrix,
1869    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1870    In general, for PDE problems in which most nonzeros are near the diagonal,
1871    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1872    or you will get TERRIBLE performance; see the users' manual chapter on
1873    matrices.
1874 
1875    Level: intermediate
1876 
1877 .keywords: matrix, block, aij, compressed row, sparse, parallel
1878 
1879 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1880 @*/
1881 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1882 {
1883   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1884 
1885   PetscFunctionBegin;
1886   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1887   if (f) {
1888     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1889   }
1890   PetscFunctionReturn(0);
1891 }
1892 
1893 #undef __FUNCT__
1894 #define __FUNCT__ "MatCreateMPISBAIJ"
1895 /*@C
1896    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1897    (block compressed row).  For good matrix assembly performance
1898    the user should preallocate the matrix storage by setting the parameters
1899    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1900    performance can be increased by more than a factor of 50.
1901 
1902    Collective on MPI_Comm
1903 
1904    Input Parameters:
1905 +  comm - MPI communicator
1906 .  bs   - size of blockk
1907 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1908            This value should be the same as the local size used in creating the
1909            y vector for the matrix-vector product y = Ax.
1910 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1911            This value should be the same as the local size used in creating the
1912            x vector for the matrix-vector product y = Ax.
1913 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1914 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1915 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1916            submatrix  (same for all local rows)
1917 .  d_nnz - array containing the number of block nonzeros in the various block rows
1918            in the upper triangular portion of the in diagonal portion of the local
1919            (possibly different for each block block row) or PETSC_NULL.
1920            You must leave room for the diagonal entry even if it is zero.
1921 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1922            submatrix (same for all local rows).
1923 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1924            off-diagonal portion of the local submatrix (possibly different for
1925            each block row) or PETSC_NULL.
1926 
1927    Output Parameter:
1928 .  A - the matrix
1929 
1930    Options Database Keys:
1931 .   -mat_no_unroll - uses code that does not unroll the loops in the
1932                      block calculations (much slower)
1933 .   -mat_block_size - size of the blocks to use
1934 .   -mat_mpi - use the parallel matrix data structures even on one processor
1935                (defaults to using SeqBAIJ format on one processor)
1936 
1937    Notes:
1938    The user MUST specify either the local or global matrix dimensions
1939    (possibly both).
1940 
1941    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1942    than it must be used on all processors that share the object for that argument.
1943 
1944    Storage Information:
1945    For a square global matrix we define each processor's diagonal portion
1946    to be its local rows and the corresponding columns (a square submatrix);
1947    each processor's off-diagonal portion encompasses the remainder of the
1948    local matrix (a rectangular submatrix).
1949 
1950    The user can specify preallocated storage for the diagonal part of
1951    the local submatrix with either d_nz or d_nnz (not both).  Set
1952    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1953    memory allocation.  Likewise, specify preallocated storage for the
1954    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1955 
1956    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1957    the figure below we depict these three local rows and all columns (0-11).
1958 
1959 .vb
1960            0 1 2 3 4 5 6 7 8 9 10 11
1961           -------------------
1962    row 3  |  o o o d d d o o o o o o
1963    row 4  |  o o o d d d o o o o o o
1964    row 5  |  o o o d d d o o o o o o
1965           -------------------
1966 .ve
1967 
1968    Thus, any entries in the d locations are stored in the d (diagonal)
1969    submatrix, and any entries in the o locations are stored in the
1970    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1971    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1972 
1973    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1974    plus the diagonal part of the d matrix,
1975    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1976    In general, for PDE problems in which most nonzeros are near the diagonal,
1977    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1978    or you will get TERRIBLE performance; see the users' manual chapter on
1979    matrices.
1980 
1981    Level: intermediate
1982 
1983 .keywords: matrix, block, aij, compressed row, sparse, parallel
1984 
1985 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1986 @*/
1987 
1988 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
1989 {
1990   int ierr,size;
1991 
1992   PetscFunctionBegin;
1993   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1994   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1995   if (size > 1) {
1996     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1997     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1998   } else {
1999     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2000     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2001   }
2002   PetscFunctionReturn(0);
2003 }
2004 
2005 
2006 #undef __FUNCT__
2007 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2008 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2009 {
2010   Mat          mat;
2011   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2012   int          ierr,len=0;
2013 
2014   PetscFunctionBegin;
2015   *newmat       = 0;
2016   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2017   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2018   mat->preallocated = PETSC_TRUE;
2019   a = (Mat_MPISBAIJ*)mat->data;
2020   a->bs  = oldmat->bs;
2021   a->bs2 = oldmat->bs2;
2022   a->mbs = oldmat->mbs;
2023   a->nbs = oldmat->nbs;
2024   a->Mbs = oldmat->Mbs;
2025   a->Nbs = oldmat->Nbs;
2026 
2027   a->rstart       = oldmat->rstart;
2028   a->rend         = oldmat->rend;
2029   a->cstart       = oldmat->cstart;
2030   a->cend         = oldmat->cend;
2031   a->size         = oldmat->size;
2032   a->rank         = oldmat->rank;
2033   a->donotstash   = oldmat->donotstash;
2034   a->roworiented  = oldmat->roworiented;
2035   a->rowindices   = 0;
2036   a->rowvalues    = 0;
2037   a->getrowactive = PETSC_FALSE;
2038   a->barray       = 0;
2039   a->rstart_bs    = oldmat->rstart_bs;
2040   a->rend_bs      = oldmat->rend_bs;
2041   a->cstart_bs    = oldmat->cstart_bs;
2042   a->cend_bs      = oldmat->cend_bs;
2043 
2044   /* hash table stuff */
2045   a->ht           = 0;
2046   a->hd           = 0;
2047   a->ht_size      = 0;
2048   a->ht_flag      = oldmat->ht_flag;
2049   a->ht_fact      = oldmat->ht_fact;
2050   a->ht_total_ct  = 0;
2051   a->ht_insert_ct = 0;
2052 
2053   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2054   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2055   a->cowners    = a->rowners + a->size + 2;
2056   a->rowners_bs = a->cowners + a->size + 2;
2057   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2058   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2059   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2060   if (oldmat->colmap) {
2061 #if defined (PETSC_USE_CTABLE)
2062     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2063 #else
2064     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2065     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2066     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2067 #endif
2068   } else a->colmap = 0;
2069   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2070     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2071     PetscLogObjectMemory(mat,len*sizeof(int));
2072     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2073   } else a->garray = 0;
2074 
2075   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2076   PetscLogObjectParent(mat,a->lvec);
2077   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2078 
2079   PetscLogObjectParent(mat,a->Mvctx);
2080   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2081   PetscLogObjectParent(mat,a->A);
2082   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2083   PetscLogObjectParent(mat,a->B);
2084   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2085   *newmat = mat;
2086   PetscFunctionReturn(0);
2087 }
2088 
2089 #include "petscsys.h"
2090 
2091 #undef __FUNCT__
2092 #define __FUNCT__ "MatLoad_MPISBAIJ"
2093 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2094 {
2095   Mat          A;
2096   int          i,nz,ierr,j,rstart,rend,fd;
2097   PetscScalar  *vals,*buf;
2098   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2099   MPI_Status   status;
2100   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2101   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2102   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2103   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2104   int          dcount,kmax,k,nzcount,tmp;
2105 
2106   PetscFunctionBegin;
2107   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2108 
2109   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2110   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2111   if (!rank) {
2112     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2113     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2114     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2115     if (header[3] < 0) {
2116       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2117     }
2118   }
2119 
2120   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2121   M = header[1]; N = header[2];
2122 
2123   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2124 
2125   /*
2126      This code adds extra rows to make sure the number of rows is
2127      divisible by the blocksize
2128   */
2129   Mbs        = M/bs;
2130   extra_rows = bs - M + bs*(Mbs);
2131   if (extra_rows == bs) extra_rows = 0;
2132   else                  Mbs++;
2133   if (extra_rows &&!rank) {
2134     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2135   }
2136 
2137   /* determine ownership of all rows */
2138   mbs        = Mbs/size + ((Mbs % size) > rank);
2139   m          = mbs*bs;
2140   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2141   browners   = rowners + size + 1;
2142   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2143   rowners[0] = 0;
2144   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2145   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2146   rstart = rowners[rank];
2147   rend   = rowners[rank+1];
2148 
2149   /* distribute row lengths to all processors */
2150   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2151   if (!rank) {
2152     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2153     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2154     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2155     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2156     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2157     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2158     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2159   } else {
2160     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2161   }
2162 
2163   if (!rank) {   /* procs[0] */
2164     /* calculate the number of nonzeros on each processor */
2165     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2166     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2167     for (i=0; i<size; i++) {
2168       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2169         procsnz[i] += rowlengths[j];
2170       }
2171     }
2172     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2173 
2174     /* determine max buffer needed and allocate it */
2175     maxnz = 0;
2176     for (i=0; i<size; i++) {
2177       maxnz = PetscMax(maxnz,procsnz[i]);
2178     }
2179     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2180 
2181     /* read in my part of the matrix column indices  */
2182     nz     = procsnz[0];
2183     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2184     mycols = ibuf;
2185     if (size == 1)  nz -= extra_rows;
2186     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2187     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2188 
2189     /* read in every ones (except the last) and ship off */
2190     for (i=1; i<size-1; i++) {
2191       nz   = procsnz[i];
2192       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2193       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2194     }
2195     /* read in the stuff for the last proc */
2196     if (size != 1) {
2197       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2198       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2199       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2200       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2201     }
2202     ierr = PetscFree(cols);CHKERRQ(ierr);
2203   } else {  /* procs[i], i>0 */
2204     /* determine buffer space needed for message */
2205     nz = 0;
2206     for (i=0; i<m; i++) {
2207       nz += locrowlens[i];
2208     }
2209     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2210     mycols = ibuf;
2211     /* receive message of column indices*/
2212     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2213     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2214     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2215   }
2216 
2217   /* loop over local rows, determining number of off diagonal entries */
2218   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2219   odlens   = dlens + (rend-rstart);
2220   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2221   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2222   masked1  = mask    + Mbs;
2223   masked2  = masked1 + Mbs;
2224   rowcount = 0; nzcount = 0;
2225   for (i=0; i<mbs; i++) {
2226     dcount  = 0;
2227     odcount = 0;
2228     for (j=0; j<bs; j++) {
2229       kmax = locrowlens[rowcount];
2230       for (k=0; k<kmax; k++) {
2231         tmp = mycols[nzcount++]/bs; /* block col. index */
2232         if (!mask[tmp]) {
2233           mask[tmp] = 1;
2234           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2235           else masked1[dcount++] = tmp; /* entry in diag portion */
2236         }
2237       }
2238       rowcount++;
2239     }
2240 
2241     dlens[i]  = dcount;  /* d_nzz[i] */
2242     odlens[i] = odcount; /* o_nzz[i] */
2243 
2244     /* zero out the mask elements we set */
2245     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2246     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2247   }
2248 
2249   /* create our matrix */
2250   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2251   ierr = MatSetType(A,type);CHKERRQ(ierr);
2252   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2253   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2254 
2255   if (!rank) {
2256     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2257     /* read in my part of the matrix numerical values  */
2258     nz = procsnz[0];
2259     vals = buf;
2260     mycols = ibuf;
2261     if (size == 1)  nz -= extra_rows;
2262     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2263     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2264 
2265     /* insert into matrix */
2266     jj      = rstart*bs;
2267     for (i=0; i<m; i++) {
2268       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2269       mycols += locrowlens[i];
2270       vals   += locrowlens[i];
2271       jj++;
2272     }
2273 
2274     /* read in other processors (except the last one) and ship out */
2275     for (i=1; i<size-1; i++) {
2276       nz   = procsnz[i];
2277       vals = buf;
2278       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2279       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2280     }
2281     /* the last proc */
2282     if (size != 1){
2283       nz   = procsnz[i] - extra_rows;
2284       vals = buf;
2285       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2286       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2287       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2288     }
2289     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2290 
2291   } else {
2292     /* receive numeric values */
2293     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2294 
2295     /* receive message of values*/
2296     vals   = buf;
2297     mycols = ibuf;
2298     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2299     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2300     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2301 
2302     /* insert into matrix */
2303     jj      = rstart*bs;
2304     for (i=0; i<m; i++) {
2305       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2306       mycols += locrowlens[i];
2307       vals   += locrowlens[i];
2308       jj++;
2309     }
2310   }
2311 
2312   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2313   ierr = PetscFree(buf);CHKERRQ(ierr);
2314   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2315   ierr = PetscFree(rowners);CHKERRQ(ierr);
2316   ierr = PetscFree(dlens);CHKERRQ(ierr);
2317   ierr = PetscFree(mask);CHKERRQ(ierr);
2318   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2319   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2320   *newmat = A;
2321   PetscFunctionReturn(0);
2322 }
2323 
2324 #undef __FUNCT__
2325 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2326 /*@
2327    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2328 
2329    Input Parameters:
2330 .  mat  - the matrix
2331 .  fact - factor
2332 
2333    Collective on Mat
2334 
2335    Level: advanced
2336 
2337   Notes:
2338    This can also be set by the command line option: -mat_use_hash_table fact
2339 
2340 .keywords: matrix, hashtable, factor, HT
2341 
2342 .seealso: MatSetOption()
2343 @*/
2344 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2345 {
2346   PetscFunctionBegin;
2347   SETERRQ(1,"Function not yet written for SBAIJ format");
2348   /* PetscFunctionReturn(0); */
2349 }
2350 
2351 #undef __FUNCT__
2352 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2353 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2354 {
2355   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2356   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2357   PetscReal    atmp;
2358   PetscReal    *work,*svalues,*rvalues;
2359   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2360   int          rank,size,*rowners_bs,dest,count,source;
2361   PetscScalar  *va;
2362   MatScalar    *ba;
2363   MPI_Status   stat;
2364 
2365   PetscFunctionBegin;
2366   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2367   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2368 
2369   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2370   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2371 
2372   bs   = a->bs;
2373   mbs  = a->mbs;
2374   Mbs  = a->Mbs;
2375   ba   = b->a;
2376   bi   = b->i;
2377   bj   = b->j;
2378   /*
2379   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2380   PetscSynchronizedFlush(A->comm);
2381   */
2382 
2383   /* find ownerships */
2384   rowners_bs = a->rowners_bs;
2385   /*
2386   if (!rank){
2387     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2388   }
2389   */
2390 
2391   /* each proc creates an array to be distributed */
2392   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2393   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2394 
2395   /* row_max for B */
2396   if (rank != size-1){
2397     for (i=0; i<mbs; i++) {
2398       ncols = bi[1] - bi[0]; bi++;
2399       brow  = bs*i;
2400       for (j=0; j<ncols; j++){
2401         bcol = bs*(*bj);
2402         for (kcol=0; kcol<bs; kcol++){
2403           col = bcol + kcol;                 /* local col index */
2404           col += rowners_bs[rank+1];      /* global col index */
2405           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2406           for (krow=0; krow<bs; krow++){
2407             atmp = PetscAbsScalar(*ba); ba++;
2408             row = brow + krow;    /* local row index */
2409             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2410             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2411             if (work[col] < atmp) work[col] = atmp;
2412           }
2413         }
2414         bj++;
2415       }
2416     }
2417     /*
2418       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2419       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2420       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2421       */
2422 
2423     /* send values to its owners */
2424     for (dest=rank+1; dest<size; dest++){
2425       svalues = work + rowners_bs[dest];
2426       count   = rowners_bs[dest+1]-rowners_bs[dest];
2427       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2428       /*
2429       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2430       PetscSynchronizedFlush(A->comm);
2431       */
2432     }
2433   }
2434 
2435   /* receive values */
2436   if (rank){
2437     rvalues = work;
2438     count   = rowners_bs[rank+1]-rowners_bs[rank];
2439     for (source=0; source<rank; source++){
2440       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2441       /* process values */
2442       for (i=0; i<count; i++){
2443         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2444       }
2445       /*
2446       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2447       PetscSynchronizedFlush(A->comm);
2448       */
2449     }
2450   }
2451 
2452   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2453   ierr = PetscFree(work);CHKERRQ(ierr);
2454   PetscFunctionReturn(0);
2455 }
2456 
2457 #undef __FUNCT__
2458 #define __FUNCT__ "MatRelax_MPISBAIJ"
2459 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2460 {
2461   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2462   int            ierr,mbs=mat->mbs,bs=mat->bs;
2463   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2464   Vec            bb1;
2465 
2466   PetscFunctionBegin;
2467   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2468   if (bs > 1)
2469     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2470 
2471   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2472     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2473       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2474       its--;
2475     }
2476 
2477     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2478     while (its--){
2479 
2480       /* lower triangular part: slvec0b = - B^T*xx */
2481       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2482 
2483       /* copy xx into slvec0a */
2484       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2485       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2486       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2487       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2488 
2489       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2490 
2491       /* copy bb into slvec1a */
2492       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2493       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2494       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2495       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2496 
2497       /* set slvec1b = 0 */
2498       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2499 
2500       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2501       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2502       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2503       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2504 
2505       /* upper triangular part: bb1 = bb1 - B*x */
2506       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2507 
2508       /* local diagonal sweep */
2509       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2510     }
2511     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2512   } else {
2513     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2514   }
2515   PetscFunctionReturn(0);
2516 }
2517 
2518 #undef __FUNCT__
2519 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2520 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2521 {
2522   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2523   int            ierr;
2524   PetscScalar    mone=-1.0;
2525   Vec            lvec1,bb1;
2526 
2527   PetscFunctionBegin;
2528   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2529   if (mat->bs > 1)
2530     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2531 
2532   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2533     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2534       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2535       its--;
2536     }
2537 
2538     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2539     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2540     while (its--){
2541       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2542 
2543       /* lower diagonal part: bb1 = bb - B^T*xx */
2544       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2545       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2546 
2547       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2548       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2549       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2550 
2551       /* upper diagonal part: bb1 = bb1 - B*x */
2552       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2553       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2554 
2555       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2556 
2557       /* diagonal sweep */
2558       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2559     }
2560     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2561     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2562   } else {
2563     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2564   }
2565   PetscFunctionReturn(0);
2566 }
2567 
2568