xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 7010366f0b72775f46baed63aefa569e04d8b3c9)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
112         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
113         new_j = (int*)(new_a + bs2*new_nz); \
114         new_i = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 #ifndef MatSetValues_SeqBAIJ_B_Private
155 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
185  \
186         /* malloc new storage space */ \
187         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
188         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
189         new_j = (int*)(new_a + bs2*new_nz); \
190         new_i = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 #endif
231 
232 #if defined(PETSC_USE_MAT_SINGLE)
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValues_MPISBAIJ"
235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
238   int          ierr,i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   int         ierr,i,N = m*n*b->bs2;
262   MatScalar   *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283   int         ierr,i,N = m*n;
284   MatScalar   *vsingle;
285 
286   PetscFunctionBegin;
287   SETERRQ(1,"Function not yet written for SBAIJ format");
288   /* PetscFunctionReturn(0); */
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
294 {
295   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
296   int         ierr,i,N = m*n*b->bs2;
297   MatScalar   *vsingle;
298 
299   PetscFunctionBegin;
300   SETERRQ(1,"Function not yet written for SBAIJ format");
301   /* PetscFunctionReturn(0); */
302 }
303 #endif
304 
305 /* Only add/insert a(i,j) with i<=j (blocks).
306    Any a(i,j) with i>j input by user is ingored.
307 */
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
311 {
312   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
313   MatScalar    value;
314   PetscTruth   roworiented = baij->roworiented;
315   int          ierr,i,j,row,col;
316   int          rstart_orig=baij->rstart_bs;
317   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
318   int          cend_orig=baij->cend_bs,bs=baij->bs;
319 
320   /* Some Variables required in the macro */
321   Mat          A = baij->A;
322   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
323   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
324   MatScalar    *aa=a->a;
325 
326   Mat          B = baij->B;
327   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
328   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
329   MatScalar    *ba=b->a;
330 
331   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
332   int          low,high,t,ridx,cidx,bs2=a->bs2;
333   MatScalar    *ap,*bap;
334 
335   /* for stash */
336   int          n_loc, *in_loc=0;
337   MatScalar    *v_loc=0;
338 
339   PetscFunctionBegin;
340 
341   if(!baij->donotstash){
342     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
343     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
344   }
345 
346   for (i=0; i<m; i++) {
347     if (im[i] < 0) continue;
348 #if defined(PETSC_USE_BOPT_g)
349     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
350 #endif
351     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
352       row = im[i] - rstart_orig;              /* local row index */
353       for (j=0; j<n; j++) {
354         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
355         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
356           col = in[j] - cstart_orig;          /* local col index */
357           brow = row/bs; bcol = col/bs;
358           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
359           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
360           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
361           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
362         } else if (in[j] < 0) continue;
363 #if defined(PETSC_USE_BOPT_g)
364         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
365 #endif
366         else {  /* off-diag entry (B) */
367           if (mat->was_assembled) {
368             if (!baij->colmap) {
369               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
370             }
371 #if defined (PETSC_USE_CTABLE)
372             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
373             col  = col - 1;
374 #else
375             col = baij->colmap[in[j]/bs] - 1;
376 #endif
377             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
378               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
379               col =  in[j];
380               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
381               B = baij->B;
382               b = (Mat_SeqBAIJ*)(B)->data;
383               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
384               ba=b->a;
385             } else col += in[j]%bs;
386           } else col = in[j];
387           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
388           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
389           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
390         }
391       }
392     } else {  /* off processor entry */
393       if (!baij->donotstash) {
394         n_loc = 0;
395         for (j=0; j<n; j++){
396           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
397           in_loc[n_loc] = in[j];
398           if (roworiented) {
399             v_loc[n_loc] = v[i*n+j];
400           } else {
401             v_loc[n_loc] = v[j*m+i];
402           }
403           n_loc++;
404         }
405         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
406       }
407     }
408   }
409 
410   if(!baij->donotstash){
411     ierr = PetscFree(in_loc);CHKERRQ(ierr);
412     ierr = PetscFree(v_loc);CHKERRQ(ierr);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
420 {
421   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
422   const MatScalar *value;
423   MatScalar       *barray=baij->barray;
424   PetscTruth      roworiented = baij->roworiented;
425   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
426   int             rend=baij->rend,cstart=baij->cstart,stepval;
427   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
428 
429   PetscFunctionBegin;
430   if(!barray) {
431     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
432     baij->barray = barray;
433   }
434 
435   if (roworiented) {
436     stepval = (n-1)*bs;
437   } else {
438     stepval = (m-1)*bs;
439   }
440   for (i=0; i<m; i++) {
441     if (im[i] < 0) continue;
442 #if defined(PETSC_USE_BOPT_g)
443     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
444 #endif
445     if (im[i] >= rstart && im[i] < rend) {
446       row = im[i] - rstart;
447       for (j=0; j<n; j++) {
448         /* If NumCol = 1 then a copy is not required */
449         if ((roworiented) && (n == 1)) {
450           barray = (MatScalar*) v + i*bs2;
451         } else if((!roworiented) && (m == 1)) {
452           barray = (MatScalar*) v + j*bs2;
453         } else { /* Here a copy is required */
454           if (roworiented) {
455             value = v + i*(stepval+bs)*bs + j*bs;
456           } else {
457             value = v + j*(stepval+bs)*bs + i*bs;
458           }
459           for (ii=0; ii<bs; ii++,value+=stepval) {
460             for (jj=0; jj<bs; jj++) {
461               *barray++  = *value++;
462             }
463           }
464           barray -=bs2;
465         }
466 
467         if (in[j] >= cstart && in[j] < cend){
468           col  = in[j] - cstart;
469           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
470         }
471         else if (in[j] < 0) continue;
472 #if defined(PETSC_USE_BOPT_g)
473         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
474 #endif
475         else {
476           if (mat->was_assembled) {
477             if (!baij->colmap) {
478               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
479             }
480 
481 #if defined(PETSC_USE_BOPT_g)
482 #if defined (PETSC_USE_CTABLE)
483             { int data;
484               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
485               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
486             }
487 #else
488             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
489 #endif
490 #endif
491 #if defined (PETSC_USE_CTABLE)
492 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
493             col  = (col - 1)/bs;
494 #else
495             col = (baij->colmap[in[j]] - 1)/bs;
496 #endif
497             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
498               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
499               col =  in[j];
500             }
501           }
502           else col = in[j];
503           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
504         }
505       }
506     } else {
507       if (!baij->donotstash) {
508         if (roworiented) {
509           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
510         } else {
511           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
512         }
513       }
514     }
515   }
516   PetscFunctionReturn(0);
517 }
518 
519 #define HASH_KEY 0.6180339887
520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
523 #undef __FUNCT__
524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
526 {
527   PetscFunctionBegin;
528   SETERRQ(1,"Function not yet written for SBAIJ format");
529   /* PetscFunctionReturn(0); */
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
535 {
536   PetscFunctionBegin;
537   SETERRQ(1,"Function not yet written for SBAIJ format");
538   /* PetscFunctionReturn(0); */
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatGetValues_MPISBAIJ"
543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
544 {
545   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
546   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
547   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
552     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
553     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
554       row = idxm[i] - bsrstart;
555       for (j=0; j<n; j++) {
556         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
557         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
558         if (idxn[j] >= bscstart && idxn[j] < bscend){
559           col = idxn[j] - bscstart;
560           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
561         } else {
562           if (!baij->colmap) {
563             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
564           }
565 #if defined (PETSC_USE_CTABLE)
566           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
567           data --;
568 #else
569           data = baij->colmap[idxn[j]/bs]-1;
570 #endif
571           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
572           else {
573             col  = data + idxn[j]%bs;
574             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575           }
576         }
577       }
578     } else {
579       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
580     }
581   }
582  PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatNorm_MPISBAIJ"
587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
588 {
589   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
590   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
591   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
592   int        ierr;
593   PetscReal  sum[2],*lnorm2;
594 
595   PetscFunctionBegin;
596   if (baij->size == 1) {
597     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
598   } else {
599     if (type == NORM_FROBENIUS) {
600       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
601       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
602       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
603       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
605       /*
606       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
607       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
608       */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       /*
611       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
612       PetscSynchronizedFlush(mat->comm); */
613 
614       *norm = sqrt(sum[0] + 2*sum[1]);
615       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 /*
624   Creates the hash table, and sets the table
625   This table is created only once.
626   If new entried need to be added to the matrix
627   then the hash table has to be destroyed and
628   recreated.
629 */
630 #undef __FUNCT__
631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
633 {
634   PetscFunctionBegin;
635   SETERRQ(1,"Function not yet written for SBAIJ format");
636   /* PetscFunctionReturn(0); */
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
642 {
643   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
644   int         ierr,nstash,reallocs;
645   InsertMode  addv;
646 
647   PetscFunctionBegin;
648   if (baij->donotstash) {
649     PetscFunctionReturn(0);
650   }
651 
652   /* make sure all processors are either in INSERTMODE or ADDMODE */
653   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
654   if (addv == (ADD_VALUES|INSERT_VALUES)) {
655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
656   }
657   mat->insertmode = addv; /* in case this processor had no cache */
658 
659   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
673   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
674   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
675   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
676   int         *row,*col,other_disassembled;
677   PetscTruth  r1,r2,r3;
678   MatScalar   *val;
679   InsertMode  addv = mat->insertmode;
680 
681   PetscFunctionBegin;
682 
683   if (!baij->donotstash) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       /*
687       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
688       PetscSynchronizedFlush(mat->comm);
689       */
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
695         if (j < n) ncols = j-i;
696         else       ncols = n-i;
697         /* Now assemble all these values with a single function call */
698         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703     /* Now process the block-stash. Since the values are stashed column-oriented,
704        set the roworiented flag to column oriented, and after MatSetValues()
705        restore the original flags */
706     r1 = baij->roworiented;
707     r2 = a->roworiented;
708     r3 = b->roworiented;
709     baij->roworiented = PETSC_FALSE;
710     a->roworiented    = PETSC_FALSE;
711     b->roworiented    = PETSC_FALSE;
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n;) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
719         if (j < n) ncols = j-i;
720         else       ncols = n-i;
721         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
722         i = j;
723       }
724     }
725     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
726     baij->roworiented = r1;
727     a->roworiented    = r2;
728     b->roworiented    = r3;
729   }
730 
731   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746 
747   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
748     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
749   }
750   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
752 
753 #if defined(PETSC_USE_BOPT_g)
754   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
755     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
756     baij->ht_total_ct  = 0;
757     baij->ht_insert_ct = 0;
758   }
759 #endif
760   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
761     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
762     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
763     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
764   }
765 
766   if (baij->rowvalues) {
767     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
768     baij->rowvalues = 0;
769   }
770 
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
779   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
780   PetscTruth        isascii,isdraw;
781   PetscViewer       sviewer;
782   PetscViewerFormat format;
783 
784   PetscFunctionBegin;
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
786   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
787   if (isascii) {
788     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
789     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
790       MatInfo info;
791       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
794               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
795               baij->bs,(int)info.memory);CHKERRQ(ierr);
796       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
797       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
798       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
799       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
800       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
801       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_INFO) {
804       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw       draw;
811     PetscTruth isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
814   }
815 
816   if (size == 1) {
817     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
818     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
819   } else {
820     /* assemble the entire matrix onto first processor. */
821     Mat         A;
822     Mat_SeqSBAIJ *Aloc;
823     Mat_SeqBAIJ *Bloc;
824     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
825     MatScalar   *a;
826 
827     if (!rank) {
828       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
829     } else {
830       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
831     }
832     PetscLogObjectParent(mat,A);
833 
834     /* copy over the A part */
835     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
836     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
837     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
838 
839     for (i=0; i<mbs; i++) {
840       rvals[0] = bs*(baij->rstart + i);
841       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
842       for (j=ai[i]; j<ai[i+1]; j++) {
843         col = (baij->cstart+aj[j])*bs;
844         for (k=0; k<bs; k++) {
845           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
846           col++; a += bs;
847         }
848       }
849     }
850     /* copy over the B part */
851     Bloc = (Mat_SeqBAIJ*)baij->B->data;
852     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = baij->garray[aj[j]]*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     ierr = PetscFree(rvals);CHKERRQ(ierr);
865     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867     /*
868        Everyone has to call to draw the matrix since the graphics waits are
869        synchronized across all processors that share the PetscDraw object
870     */
871     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
872     if (!rank) {
873       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
874       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
875     }
876     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
877     ierr = MatDestroy(A);CHKERRQ(ierr);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatView_MPISBAIJ"
884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
885 {
886   int        ierr;
887   PetscTruth isascii,isdraw,issocket,isbinary;
888 
889   PetscFunctionBegin;
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
894   if (isascii || isdraw || issocket || isbinary) {
895     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
896   } else {
897     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatDestroy_MPISBAIJ"
904 int MatDestroy_MPISBAIJ(Mat mat)
905 {
906   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
907   int         ierr;
908 
909   PetscFunctionBegin;
910 #if defined(PETSC_USE_LOG)
911   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
912 #endif
913   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
914   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
915   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
917   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
918 #if defined (PETSC_USE_CTABLE)
919   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
920 #else
921   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
922 #endif
923   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
924   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
925   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
926   if (baij->slvec0) {
927     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
928     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
929   }
930   if (baij->slvec1) {
931     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
933     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
934   }
935   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
936   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
937   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
938   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
939 #if defined(PETSC_USE_MAT_SINGLE)
940   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
941 #endif
942   ierr = PetscFree(baij);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMult_MPISBAIJ"
948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
949 {
950   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
951   int         ierr,nt,mbs=a->mbs,bs=a->bs;
952   PetscScalar *x,*from,zero=0.0;
953 
954   PetscFunctionBegin;
955   /*
956   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
957   PetscSynchronizedFlush(A->comm);
958   */
959   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
960   if (nt != A->n) {
961     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
962   }
963   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
964   if (nt != A->m) {
965     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
966   }
967 
968   /* diagonal part */
969   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
970   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
971 
972   /* subdiagonal part */
973   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
974 
975   /* copy x into the vec slvec0 */
976   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
977   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
978   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
979   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
980 
981   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
982   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
983   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
984 
985   /* supperdiagonal part */
986   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
987 
988   PetscFunctionReturn(0);
989 }
990 
991 #undef __FUNCT__
992 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
993 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
994 {
995   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
996   int         ierr,nt;
997 
998   PetscFunctionBegin;
999   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1000   if (nt != A->n) {
1001     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1002   }
1003   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1004   if (nt != A->m) {
1005     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1006   }
1007 
1008   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1009   /* do diagonal part */
1010   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1011   /* do supperdiagonal part */
1012   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1013   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1014   /* do subdiagonal part */
1015   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1016   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1017   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1018 
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1024 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1025 {
1026   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1027   int          ierr,mbs=a->mbs,bs=a->bs;
1028   PetscScalar  *x,*from,zero=0.0;
1029 
1030   PetscFunctionBegin;
1031   /*
1032   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1033   PetscSynchronizedFlush(A->comm);
1034   */
1035   /* diagonal part */
1036   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1037   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1038 
1039   /* subdiagonal part */
1040   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1041 
1042   /* copy x into the vec slvec0 */
1043   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1044   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1045   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1046   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1047 
1048   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1049   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1050   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1051 
1052   /* supperdiagonal part */
1053   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1054 
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1060 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1061 {
1062   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1063   int        ierr;
1064 
1065   PetscFunctionBegin;
1066   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1067   /* do diagonal part */
1068   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1069   /* do supperdiagonal part */
1070   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1071   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1072 
1073   /* do subdiagonal part */
1074   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1075   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1076   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1077 
1078   PetscFunctionReturn(0);
1079 }
1080 
1081 #undef __FUNCT__
1082 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1083 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1084 {
1085   PetscFunctionBegin;
1086   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1087   /* PetscFunctionReturn(0); */
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1092 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1093 {
1094   PetscFunctionBegin;
1095   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1096   /* PetscFunctionReturn(0); */
1097 }
1098 
1099 /*
1100   This only works correctly for square matrices where the subblock A->A is the
1101    diagonal block
1102 */
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1105 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1106 {
1107   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1108   int         ierr;
1109 
1110   PetscFunctionBegin;
1111   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1112   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatScale_MPISBAIJ"
1118 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1119 {
1120   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1121   int         ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1125   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1131 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1132 {
1133   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1134   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1135   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1136   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1137   int            *cmap,*idx_p,cstart = mat->cstart;
1138 
1139   PetscFunctionBegin;
1140   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1141   mat->getrowactive = PETSC_TRUE;
1142 
1143   if (!mat->rowvalues && (idx || v)) {
1144     /*
1145         allocate enough space to hold information from the longest row.
1146     */
1147     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1148     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1149     int     max = 1,mbs = mat->mbs,tmp;
1150     for (i=0; i<mbs; i++) {
1151       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1152       if (max < tmp) { max = tmp; }
1153     }
1154     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1155     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1156   }
1157 
1158   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1159   lrow = row - brstart;  /* local row index */
1160 
1161   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1162   if (!v)   {pvA = 0; pvB = 0;}
1163   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1164   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1165   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1166   nztot = nzA + nzB;
1167 
1168   cmap  = mat->garray;
1169   if (v  || idx) {
1170     if (nztot) {
1171       /* Sort by increasing column numbers, assuming A and B already sorted */
1172       int imark = -1;
1173       if (v) {
1174         *v = v_p = mat->rowvalues;
1175         for (i=0; i<nzB; i++) {
1176           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1177           else break;
1178         }
1179         imark = i;
1180         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1181         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1182       }
1183       if (idx) {
1184         *idx = idx_p = mat->rowindices;
1185         if (imark > -1) {
1186           for (i=0; i<imark; i++) {
1187             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1188           }
1189         } else {
1190           for (i=0; i<nzB; i++) {
1191             if (cmap[cworkB[i]/bs] < cstart)
1192               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1193             else break;
1194           }
1195           imark = i;
1196         }
1197         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1198         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1199       }
1200     } else {
1201       if (idx) *idx = 0;
1202       if (v)   *v   = 0;
1203     }
1204   }
1205   *nz = nztot;
1206   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1207   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1213 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1214 {
1215   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1216 
1217   PetscFunctionBegin;
1218   if (baij->getrowactive == PETSC_FALSE) {
1219     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1220   }
1221   baij->getrowactive = PETSC_FALSE;
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1227 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1228 {
1229   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1230 
1231   PetscFunctionBegin;
1232   *bs = baij->bs;
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1238 int MatZeroEntries_MPISBAIJ(Mat A)
1239 {
1240   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1241   int         ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1245   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1251 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1252 {
1253   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1254   Mat         A = a->A,B = a->B;
1255   int         ierr;
1256   PetscReal   isend[5],irecv[5];
1257 
1258   PetscFunctionBegin;
1259   info->block_size     = (PetscReal)a->bs;
1260   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1261   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1262   isend[3] = info->memory;  isend[4] = info->mallocs;
1263   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1264   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1265   isend[3] += info->memory;  isend[4] += info->mallocs;
1266   if (flag == MAT_LOCAL) {
1267     info->nz_used      = isend[0];
1268     info->nz_allocated = isend[1];
1269     info->nz_unneeded  = isend[2];
1270     info->memory       = isend[3];
1271     info->mallocs      = isend[4];
1272   } else if (flag == MAT_GLOBAL_MAX) {
1273     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1274     info->nz_used      = irecv[0];
1275     info->nz_allocated = irecv[1];
1276     info->nz_unneeded  = irecv[2];
1277     info->memory       = irecv[3];
1278     info->mallocs      = irecv[4];
1279   } else if (flag == MAT_GLOBAL_SUM) {
1280     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1281     info->nz_used      = irecv[0];
1282     info->nz_allocated = irecv[1];
1283     info->nz_unneeded  = irecv[2];
1284     info->memory       = irecv[3];
1285     info->mallocs      = irecv[4];
1286   } else {
1287     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1288   }
1289   info->rows_global       = (PetscReal)A->M;
1290   info->columns_global    = (PetscReal)A->N;
1291   info->rows_local        = (PetscReal)A->m;
1292   info->columns_local     = (PetscReal)A->N;
1293   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1294   info->fill_ratio_needed = 0;
1295   info->factor_mallocs    = 0;
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1301 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1302 {
1303   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1304   int         ierr;
1305 
1306   PetscFunctionBegin;
1307   switch (op) {
1308   case MAT_NO_NEW_NONZERO_LOCATIONS:
1309   case MAT_YES_NEW_NONZERO_LOCATIONS:
1310   case MAT_COLUMNS_UNSORTED:
1311   case MAT_COLUMNS_SORTED:
1312   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1313   case MAT_KEEP_ZEROED_ROWS:
1314   case MAT_NEW_NONZERO_LOCATION_ERR:
1315     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1316     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1317     break;
1318   case MAT_ROW_ORIENTED:
1319     a->roworiented = PETSC_TRUE;
1320     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1321     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1322     break;
1323   case MAT_ROWS_SORTED:
1324   case MAT_ROWS_UNSORTED:
1325   case MAT_YES_NEW_DIAGONALS:
1326     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1327     break;
1328   case MAT_COLUMN_ORIENTED:
1329     a->roworiented = PETSC_FALSE;
1330     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1331     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1332     break;
1333   case MAT_IGNORE_OFF_PROC_ENTRIES:
1334     a->donotstash = PETSC_TRUE;
1335     break;
1336   case MAT_NO_NEW_DIAGONALS:
1337     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1338   case MAT_USE_HASH_TABLE:
1339     a->ht_flag = PETSC_TRUE;
1340     break;
1341   case MAT_NOT_SYMMETRIC:
1342   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1343   case MAT_HERMITIAN:
1344     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1345   case MAT_SYMMETRIC:
1346   case MAT_STRUCTURALLY_SYMMETRIC:
1347   case MAT_NOT_HERMITIAN:
1348   case MAT_SYMMETRY_ETERNAL:
1349   case MAT_NOT_SYMMETRY_ETERNAL:
1350     break;
1351   default:
1352     SETERRQ(PETSC_ERR_SUP,"unknown option");
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1359 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1360 {
1361   int ierr;
1362   PetscFunctionBegin;
1363   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1369 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1370 {
1371   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1372   Mat         a = baij->A,b = baij->B;
1373   int         ierr,s1,s2,s3;
1374 
1375   PetscFunctionBegin;
1376   if (ll != rr) {
1377     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1378   }
1379   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1380   if (rr) {
1381     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1382     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1383     /* Overlap communication with computation. */
1384     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1385     /*} if (ll) { */
1386     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1387     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1388     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1389     /* } */
1390   /* scale  the diagonal block */
1391   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1392 
1393   /* if (rr) { */
1394     /* Do a scatter end and then right scale the off-diagonal block */
1395     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1396     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1397   }
1398 
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1404 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1405 {
1406   PetscFunctionBegin;
1407   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1412 int MatPrintHelp_MPISBAIJ(Mat A)
1413 {
1414   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1415   MPI_Comm    comm = A->comm;
1416   static int  called = 0;
1417   int         ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!a->rank) {
1421     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1422   }
1423   if (called) {PetscFunctionReturn(0);} else called = 1;
1424   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1425   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1431 int MatSetUnfactored_MPISBAIJ(Mat A)
1432 {
1433   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1434   int         ierr;
1435 
1436   PetscFunctionBegin;
1437   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatEqual_MPISBAIJ"
1445 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1446 {
1447   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1448   Mat         a,b,c,d;
1449   PetscTruth  flg;
1450   int         ierr;
1451 
1452   PetscFunctionBegin;
1453   a = matA->A; b = matA->B;
1454   c = matB->A; d = matB->B;
1455 
1456   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1457   if (flg == PETSC_TRUE) {
1458     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1459   }
1460   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1466 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1467 {
1468   int        ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1477 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1478 {
1479   int        i,ierr;
1480   PetscTruth flg;
1481 
1482   for (i=0; i<n; i++) {
1483     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1484     if (!flg) {
1485       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1486     }
1487   }
1488   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 
1493 /* -------------------------------------------------------------------*/
1494 static struct _MatOps MatOps_Values = {
1495        MatSetValues_MPISBAIJ,
1496        MatGetRow_MPISBAIJ,
1497        MatRestoreRow_MPISBAIJ,
1498        MatMult_MPISBAIJ,
1499 /* 4*/ MatMultAdd_MPISBAIJ,
1500        MatMultTranspose_MPISBAIJ,
1501        MatMultTransposeAdd_MPISBAIJ,
1502        0,
1503        0,
1504        0,
1505 /*10*/ 0,
1506        0,
1507        0,
1508        MatRelax_MPISBAIJ,
1509        MatTranspose_MPISBAIJ,
1510 /*15*/ MatGetInfo_MPISBAIJ,
1511        MatEqual_MPISBAIJ,
1512        MatGetDiagonal_MPISBAIJ,
1513        MatDiagonalScale_MPISBAIJ,
1514        MatNorm_MPISBAIJ,
1515 /*20*/ MatAssemblyBegin_MPISBAIJ,
1516        MatAssemblyEnd_MPISBAIJ,
1517        0,
1518        MatSetOption_MPISBAIJ,
1519        MatZeroEntries_MPISBAIJ,
1520 /*25*/ MatZeroRows_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*35*/ MatDuplicate_MPISBAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*40*/ 0,
1536        MatGetSubMatrices_MPISBAIJ,
1537        0,
1538        MatGetValues_MPISBAIJ,
1539        0,
1540 /*45*/ MatPrintHelp_MPISBAIJ,
1541        MatScale_MPISBAIJ,
1542        0,
1543        0,
1544        0,
1545 /*50*/ MatGetBlockSize_MPISBAIJ,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*55*/ 0,
1551        0,
1552        MatSetUnfactored_MPISBAIJ,
1553        0,
1554        MatSetValuesBlocked_MPISBAIJ,
1555 /*60*/ 0,
1556        0,
1557        0,
1558        MatGetPetscMaps_Petsc,
1559        0,
1560 /*65*/ 0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*70*/ MatGetRowMax_MPISBAIJ,
1566        0,
1567        0,
1568        0,
1569        0,
1570 /*75*/ 0,
1571        0,
1572        0,
1573        0,
1574        0,
1575 /*80*/ 0,
1576        0,
1577        0,
1578        0,
1579 /*85*/ MatLoad_MPISBAIJ
1580 };
1581 
1582 
1583 EXTERN_C_BEGIN
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1586 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1587 {
1588   PetscFunctionBegin;
1589   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1590   *iscopy = PETSC_FALSE;
1591   PetscFunctionReturn(0);
1592 }
1593 EXTERN_C_END
1594 
1595 EXTERN_C_BEGIN
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1598 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1599 {
1600   Mat_MPISBAIJ *b;
1601   int          ierr,i,mbs,Mbs;
1602 
1603   PetscFunctionBegin;
1604   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1605 
1606   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1607   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1608   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1609   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1610   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1611   if (d_nnz) {
1612     for (i=0; i<B->m/bs; i++) {
1613       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1614     }
1615   }
1616   if (o_nnz) {
1617     for (i=0; i<B->m/bs; i++) {
1618       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1619     }
1620   }
1621   B->preallocated = PETSC_TRUE;
1622   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1623   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1624   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1625   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1626 
1627   b   = (Mat_MPISBAIJ*)B->data;
1628   mbs = B->m/bs;
1629   Mbs = B->M/bs;
1630   if (mbs*bs != B->m) {
1631     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1632   }
1633 
1634   b->bs  = bs;
1635   b->bs2 = bs*bs;
1636   b->mbs = mbs;
1637   b->nbs = mbs;
1638   b->Mbs = Mbs;
1639   b->Nbs = Mbs;
1640 
1641   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1642   b->rowners[0]    = 0;
1643   for (i=2; i<=b->size; i++) {
1644     b->rowners[i] += b->rowners[i-1];
1645   }
1646   b->rstart    = b->rowners[b->rank];
1647   b->rend      = b->rowners[b->rank+1];
1648   b->cstart    = b->rstart;
1649   b->cend      = b->rend;
1650   for (i=0; i<=b->size; i++) {
1651     b->rowners_bs[i] = b->rowners[i]*bs;
1652   }
1653   b->rstart_bs = b-> rstart*bs;
1654   b->rend_bs   = b->rend*bs;
1655 
1656   b->cstart_bs = b->cstart*bs;
1657   b->cend_bs   = b->cend*bs;
1658 
1659 
1660   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1661   PetscLogObjectParent(B,b->A);
1662   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1663   PetscLogObjectParent(B,b->B);
1664 
1665   /* build cache for off array entries formed */
1666   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1667 
1668   PetscFunctionReturn(0);
1669 }
1670 EXTERN_C_END
1671 
1672 /*MC
1673    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1674    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1675 
1676    Options Database Keys:
1677 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1678 
1679   Level: beginner
1680 
1681 .seealso: MatCreateMPISBAIJ
1682 M*/
1683 
1684 EXTERN_C_BEGIN
1685 #undef __FUNCT__
1686 #define __FUNCT__ "MatCreate_MPISBAIJ"
1687 int MatCreate_MPISBAIJ(Mat B)
1688 {
1689   Mat_MPISBAIJ *b;
1690   int          ierr;
1691   PetscTruth   flg;
1692 
1693   PetscFunctionBegin;
1694 
1695   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1696   B->data = (void*)b;
1697   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1698   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1699 
1700   B->ops->destroy    = MatDestroy_MPISBAIJ;
1701   B->ops->view       = MatView_MPISBAIJ;
1702   B->mapping    = 0;
1703   B->factor     = 0;
1704   B->assembled  = PETSC_FALSE;
1705 
1706   B->insertmode = NOT_SET_VALUES;
1707   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1708   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1709 
1710   /* build local table of row and column ownerships */
1711   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1712   b->cowners    = b->rowners + b->size + 2;
1713   b->rowners_bs = b->cowners + b->size + 2;
1714   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1715 
1716   /* build cache for off array entries formed */
1717   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1718   b->donotstash  = PETSC_FALSE;
1719   b->colmap      = PETSC_NULL;
1720   b->garray      = PETSC_NULL;
1721   b->roworiented = PETSC_TRUE;
1722 
1723 #if defined(PETSC_USE_MAT_SINGLE)
1724   /* stuff for MatSetValues_XXX in single precision */
1725   b->setvalueslen     = 0;
1726   b->setvaluescopy    = PETSC_NULL;
1727 #endif
1728 
1729   /* stuff used in block assembly */
1730   b->barray       = 0;
1731 
1732   /* stuff used for matrix vector multiply */
1733   b->lvec         = 0;
1734   b->Mvctx        = 0;
1735   b->slvec0       = 0;
1736   b->slvec0b      = 0;
1737   b->slvec1       = 0;
1738   b->slvec1a      = 0;
1739   b->slvec1b      = 0;
1740   b->sMvctx       = 0;
1741 
1742   /* stuff for MatGetRow() */
1743   b->rowindices   = 0;
1744   b->rowvalues    = 0;
1745   b->getrowactive = PETSC_FALSE;
1746 
1747   /* hash table stuff */
1748   b->ht           = 0;
1749   b->hd           = 0;
1750   b->ht_size      = 0;
1751   b->ht_flag      = PETSC_FALSE;
1752   b->ht_fact      = 0;
1753   b->ht_total_ct  = 0;
1754   b->ht_insert_ct = 0;
1755 
1756   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1757   if (flg) {
1758     PetscReal fact = 1.39;
1759     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1760     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1761     if (fact <= 1.0) fact = 1.39;
1762     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1763     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1764   }
1765   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1766                                      "MatStoreValues_MPISBAIJ",
1767                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1769                                      "MatRetrieveValues_MPISBAIJ",
1770                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1772                                      "MatGetDiagonalBlock_MPISBAIJ",
1773                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1775                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1776                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1777   PetscFunctionReturn(0);
1778 }
1779 EXTERN_C_END
1780 
1781 /*MC
1782    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1783 
1784    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1785    and MATMPISBAIJ otherwise.
1786 
1787    Options Database Keys:
1788 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1789 
1790   Level: beginner
1791 
1792 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1793 M*/
1794 
1795 EXTERN_C_BEGIN
1796 #undef __FUNCT__
1797 #define __FUNCT__ "MatCreate_SBAIJ"
1798 int MatCreate_SBAIJ(Mat A) {
1799   int ierr,size;
1800 
1801   PetscFunctionBegin;
1802   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1803   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1804   if (size == 1) {
1805     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1806   } else {
1807     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1808   }
1809   PetscFunctionReturn(0);
1810 }
1811 EXTERN_C_END
1812 
1813 #undef __FUNCT__
1814 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1815 /*@C
1816    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1817    the user should preallocate the matrix storage by setting the parameters
1818    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1819    performance can be increased by more than a factor of 50.
1820 
1821    Collective on Mat
1822 
1823    Input Parameters:
1824 +  A - the matrix
1825 .  bs   - size of blockk
1826 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1827            submatrix  (same for all local rows)
1828 .  d_nnz - array containing the number of block nonzeros in the various block rows
1829            in the upper triangular and diagonal part of the in diagonal portion of the local
1830            (possibly different for each block row) or PETSC_NULL.  You must leave room
1831            for the diagonal entry even if it is zero.
1832 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1833            submatrix (same for all local rows).
1834 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1835            off-diagonal portion of the local submatrix (possibly different for
1836            each block row) or PETSC_NULL.
1837 
1838 
1839    Options Database Keys:
1840 .   -mat_no_unroll - uses code that does not unroll the loops in the
1841                      block calculations (much slower)
1842 .   -mat_block_size - size of the blocks to use
1843 
1844    Notes:
1845 
1846    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1847    than it must be used on all processors that share the object for that argument.
1848 
1849    Storage Information:
1850    For a square global matrix we define each processor's diagonal portion
1851    to be its local rows and the corresponding columns (a square submatrix);
1852    each processor's off-diagonal portion encompasses the remainder of the
1853    local matrix (a rectangular submatrix).
1854 
1855    The user can specify preallocated storage for the diagonal part of
1856    the local submatrix with either d_nz or d_nnz (not both).  Set
1857    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1858    memory allocation.  Likewise, specify preallocated storage for the
1859    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1860 
1861    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1862    the figure below we depict these three local rows and all columns (0-11).
1863 
1864 .vb
1865            0 1 2 3 4 5 6 7 8 9 10 11
1866           -------------------
1867    row 3  |  o o o d d d o o o o o o
1868    row 4  |  o o o d d d o o o o o o
1869    row 5  |  o o o d d d o o o o o o
1870           -------------------
1871 .ve
1872 
1873    Thus, any entries in the d locations are stored in the d (diagonal)
1874    submatrix, and any entries in the o locations are stored in the
1875    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1876    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1877 
1878    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1879    plus the diagonal part of the d matrix,
1880    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1881    In general, for PDE problems in which most nonzeros are near the diagonal,
1882    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1883    or you will get TERRIBLE performance; see the users' manual chapter on
1884    matrices.
1885 
1886    Level: intermediate
1887 
1888 .keywords: matrix, block, aij, compressed row, sparse, parallel
1889 
1890 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1891 @*/
1892 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1893 {
1894   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1895 
1896   PetscFunctionBegin;
1897   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1898   if (f) {
1899     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1900   }
1901   PetscFunctionReturn(0);
1902 }
1903 
1904 #undef __FUNCT__
1905 #define __FUNCT__ "MatCreateMPISBAIJ"
1906 /*@C
1907    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1908    (block compressed row).  For good matrix assembly performance
1909    the user should preallocate the matrix storage by setting the parameters
1910    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1911    performance can be increased by more than a factor of 50.
1912 
1913    Collective on MPI_Comm
1914 
1915    Input Parameters:
1916 +  comm - MPI communicator
1917 .  bs   - size of blockk
1918 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1919            This value should be the same as the local size used in creating the
1920            y vector for the matrix-vector product y = Ax.
1921 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1922            This value should be the same as the local size used in creating the
1923            x vector for the matrix-vector product y = Ax.
1924 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1925 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1926 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1927            submatrix  (same for all local rows)
1928 .  d_nnz - array containing the number of block nonzeros in the various block rows
1929            in the upper triangular portion of the in diagonal portion of the local
1930            (possibly different for each block block row) or PETSC_NULL.
1931            You must leave room for the diagonal entry even if it is zero.
1932 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1933            submatrix (same for all local rows).
1934 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1935            off-diagonal portion of the local submatrix (possibly different for
1936            each block row) or PETSC_NULL.
1937 
1938    Output Parameter:
1939 .  A - the matrix
1940 
1941    Options Database Keys:
1942 .   -mat_no_unroll - uses code that does not unroll the loops in the
1943                      block calculations (much slower)
1944 .   -mat_block_size - size of the blocks to use
1945 .   -mat_mpi - use the parallel matrix data structures even on one processor
1946                (defaults to using SeqBAIJ format on one processor)
1947 
1948    Notes:
1949    The user MUST specify either the local or global matrix dimensions
1950    (possibly both).
1951 
1952    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1953    than it must be used on all processors that share the object for that argument.
1954 
1955    Storage Information:
1956    For a square global matrix we define each processor's diagonal portion
1957    to be its local rows and the corresponding columns (a square submatrix);
1958    each processor's off-diagonal portion encompasses the remainder of the
1959    local matrix (a rectangular submatrix).
1960 
1961    The user can specify preallocated storage for the diagonal part of
1962    the local submatrix with either d_nz or d_nnz (not both).  Set
1963    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1964    memory allocation.  Likewise, specify preallocated storage for the
1965    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1966 
1967    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1968    the figure below we depict these three local rows and all columns (0-11).
1969 
1970 .vb
1971            0 1 2 3 4 5 6 7 8 9 10 11
1972           -------------------
1973    row 3  |  o o o d d d o o o o o o
1974    row 4  |  o o o d d d o o o o o o
1975    row 5  |  o o o d d d o o o o o o
1976           -------------------
1977 .ve
1978 
1979    Thus, any entries in the d locations are stored in the d (diagonal)
1980    submatrix, and any entries in the o locations are stored in the
1981    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1982    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1983 
1984    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1985    plus the diagonal part of the d matrix,
1986    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1987    In general, for PDE problems in which most nonzeros are near the diagonal,
1988    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1989    or you will get TERRIBLE performance; see the users' manual chapter on
1990    matrices.
1991 
1992    Level: intermediate
1993 
1994 .keywords: matrix, block, aij, compressed row, sparse, parallel
1995 
1996 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1997 @*/
1998 
1999 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2000 {
2001   int ierr,size;
2002 
2003   PetscFunctionBegin;
2004   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2005   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2006   if (size > 1) {
2007     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2008     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2009   } else {
2010     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2011     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2012   }
2013   PetscFunctionReturn(0);
2014 }
2015 
2016 
2017 #undef __FUNCT__
2018 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2019 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2020 {
2021   Mat          mat;
2022   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2023   int          ierr,len=0;
2024 
2025   PetscFunctionBegin;
2026   *newmat       = 0;
2027   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2028   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2029   mat->preallocated = PETSC_TRUE;
2030   a = (Mat_MPISBAIJ*)mat->data;
2031   a->bs  = oldmat->bs;
2032   a->bs2 = oldmat->bs2;
2033   a->mbs = oldmat->mbs;
2034   a->nbs = oldmat->nbs;
2035   a->Mbs = oldmat->Mbs;
2036   a->Nbs = oldmat->Nbs;
2037 
2038   a->rstart       = oldmat->rstart;
2039   a->rend         = oldmat->rend;
2040   a->cstart       = oldmat->cstart;
2041   a->cend         = oldmat->cend;
2042   a->size         = oldmat->size;
2043   a->rank         = oldmat->rank;
2044   a->donotstash   = oldmat->donotstash;
2045   a->roworiented  = oldmat->roworiented;
2046   a->rowindices   = 0;
2047   a->rowvalues    = 0;
2048   a->getrowactive = PETSC_FALSE;
2049   a->barray       = 0;
2050   a->rstart_bs    = oldmat->rstart_bs;
2051   a->rend_bs      = oldmat->rend_bs;
2052   a->cstart_bs    = oldmat->cstart_bs;
2053   a->cend_bs      = oldmat->cend_bs;
2054 
2055   /* hash table stuff */
2056   a->ht           = 0;
2057   a->hd           = 0;
2058   a->ht_size      = 0;
2059   a->ht_flag      = oldmat->ht_flag;
2060   a->ht_fact      = oldmat->ht_fact;
2061   a->ht_total_ct  = 0;
2062   a->ht_insert_ct = 0;
2063 
2064   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2065   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2066   a->cowners    = a->rowners + a->size + 2;
2067   a->rowners_bs = a->cowners + a->size + 2;
2068   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2069   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2070   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2071   if (oldmat->colmap) {
2072 #if defined (PETSC_USE_CTABLE)
2073     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2074 #else
2075     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2076     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2077     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2078 #endif
2079   } else a->colmap = 0;
2080   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2081     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2082     PetscLogObjectMemory(mat,len*sizeof(int));
2083     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2084   } else a->garray = 0;
2085 
2086   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2087   PetscLogObjectParent(mat,a->lvec);
2088   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2089 
2090   PetscLogObjectParent(mat,a->Mvctx);
2091   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2092   PetscLogObjectParent(mat,a->A);
2093   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2094   PetscLogObjectParent(mat,a->B);
2095   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2096   *newmat = mat;
2097   PetscFunctionReturn(0);
2098 }
2099 
2100 #include "petscsys.h"
2101 
2102 #undef __FUNCT__
2103 #define __FUNCT__ "MatLoad_MPISBAIJ"
2104 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2105 {
2106   Mat          A;
2107   int          i,nz,ierr,j,rstart,rend,fd;
2108   PetscScalar  *vals,*buf;
2109   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2110   MPI_Status   status;
2111   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2112   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2113   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2114   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2115   int          dcount,kmax,k,nzcount,tmp;
2116 
2117   PetscFunctionBegin;
2118   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2119 
2120   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2121   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2122   if (!rank) {
2123     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2124     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2125     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2126     if (header[3] < 0) {
2127       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2128     }
2129   }
2130 
2131   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2132   M = header[1]; N = header[2];
2133 
2134   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2135 
2136   /*
2137      This code adds extra rows to make sure the number of rows is
2138      divisible by the blocksize
2139   */
2140   Mbs        = M/bs;
2141   extra_rows = bs - M + bs*(Mbs);
2142   if (extra_rows == bs) extra_rows = 0;
2143   else                  Mbs++;
2144   if (extra_rows &&!rank) {
2145     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2146   }
2147 
2148   /* determine ownership of all rows */
2149   mbs        = Mbs/size + ((Mbs % size) > rank);
2150   m          = mbs*bs;
2151   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2152   browners   = rowners + size + 1;
2153   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2154   rowners[0] = 0;
2155   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2156   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2157   rstart = rowners[rank];
2158   rend   = rowners[rank+1];
2159 
2160   /* distribute row lengths to all processors */
2161   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2162   if (!rank) {
2163     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2164     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2165     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2166     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2167     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2168     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2169     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2170   } else {
2171     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2172   }
2173 
2174   if (!rank) {   /* procs[0] */
2175     /* calculate the number of nonzeros on each processor */
2176     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2177     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2178     for (i=0; i<size; i++) {
2179       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2180         procsnz[i] += rowlengths[j];
2181       }
2182     }
2183     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2184 
2185     /* determine max buffer needed and allocate it */
2186     maxnz = 0;
2187     for (i=0; i<size; i++) {
2188       maxnz = PetscMax(maxnz,procsnz[i]);
2189     }
2190     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2191 
2192     /* read in my part of the matrix column indices  */
2193     nz     = procsnz[0];
2194     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2195     mycols = ibuf;
2196     if (size == 1)  nz -= extra_rows;
2197     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2198     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2199 
2200     /* read in every ones (except the last) and ship off */
2201     for (i=1; i<size-1; i++) {
2202       nz   = procsnz[i];
2203       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2204       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2205     }
2206     /* read in the stuff for the last proc */
2207     if (size != 1) {
2208       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2209       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2210       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2211       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2212     }
2213     ierr = PetscFree(cols);CHKERRQ(ierr);
2214   } else {  /* procs[i], i>0 */
2215     /* determine buffer space needed for message */
2216     nz = 0;
2217     for (i=0; i<m; i++) {
2218       nz += locrowlens[i];
2219     }
2220     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2221     mycols = ibuf;
2222     /* receive message of column indices*/
2223     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2224     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2225     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2226   }
2227 
2228   /* loop over local rows, determining number of off diagonal entries */
2229   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2230   odlens   = dlens + (rend-rstart);
2231   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2232   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2233   masked1  = mask    + Mbs;
2234   masked2  = masked1 + Mbs;
2235   rowcount = 0; nzcount = 0;
2236   for (i=0; i<mbs; i++) {
2237     dcount  = 0;
2238     odcount = 0;
2239     for (j=0; j<bs; j++) {
2240       kmax = locrowlens[rowcount];
2241       for (k=0; k<kmax; k++) {
2242         tmp = mycols[nzcount++]/bs; /* block col. index */
2243         if (!mask[tmp]) {
2244           mask[tmp] = 1;
2245           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2246           else masked1[dcount++] = tmp; /* entry in diag portion */
2247         }
2248       }
2249       rowcount++;
2250     }
2251 
2252     dlens[i]  = dcount;  /* d_nzz[i] */
2253     odlens[i] = odcount; /* o_nzz[i] */
2254 
2255     /* zero out the mask elements we set */
2256     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2257     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2258   }
2259 
2260   /* create our matrix */
2261   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2262   ierr = MatSetType(A,type);CHKERRQ(ierr);
2263   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2264   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2265 
2266   if (!rank) {
2267     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2268     /* read in my part of the matrix numerical values  */
2269     nz = procsnz[0];
2270     vals = buf;
2271     mycols = ibuf;
2272     if (size == 1)  nz -= extra_rows;
2273     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2274     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2275 
2276     /* insert into matrix */
2277     jj      = rstart*bs;
2278     for (i=0; i<m; i++) {
2279       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2280       mycols += locrowlens[i];
2281       vals   += locrowlens[i];
2282       jj++;
2283     }
2284 
2285     /* read in other processors (except the last one) and ship out */
2286     for (i=1; i<size-1; i++) {
2287       nz   = procsnz[i];
2288       vals = buf;
2289       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2290       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2291     }
2292     /* the last proc */
2293     if (size != 1){
2294       nz   = procsnz[i] - extra_rows;
2295       vals = buf;
2296       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2297       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2298       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2299     }
2300     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2301 
2302   } else {
2303     /* receive numeric values */
2304     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2305 
2306     /* receive message of values*/
2307     vals   = buf;
2308     mycols = ibuf;
2309     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2310     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2311     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2312 
2313     /* insert into matrix */
2314     jj      = rstart*bs;
2315     for (i=0; i<m; i++) {
2316       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2317       mycols += locrowlens[i];
2318       vals   += locrowlens[i];
2319       jj++;
2320     }
2321   }
2322 
2323   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2324   ierr = PetscFree(buf);CHKERRQ(ierr);
2325   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2326   ierr = PetscFree(rowners);CHKERRQ(ierr);
2327   ierr = PetscFree(dlens);CHKERRQ(ierr);
2328   ierr = PetscFree(mask);CHKERRQ(ierr);
2329   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2330   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2331   *newmat = A;
2332   PetscFunctionReturn(0);
2333 }
2334 
2335 #undef __FUNCT__
2336 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2337 /*@
2338    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2339 
2340    Input Parameters:
2341 .  mat  - the matrix
2342 .  fact - factor
2343 
2344    Collective on Mat
2345 
2346    Level: advanced
2347 
2348   Notes:
2349    This can also be set by the command line option: -mat_use_hash_table fact
2350 
2351 .keywords: matrix, hashtable, factor, HT
2352 
2353 .seealso: MatSetOption()
2354 @*/
2355 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2356 {
2357   PetscFunctionBegin;
2358   SETERRQ(1,"Function not yet written for SBAIJ format");
2359   /* PetscFunctionReturn(0); */
2360 }
2361 
2362 #undef __FUNCT__
2363 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2364 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2365 {
2366   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2367   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2368   PetscReal    atmp;
2369   PetscReal    *work,*svalues,*rvalues;
2370   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2371   int          rank,size,*rowners_bs,dest,count,source;
2372   PetscScalar  *va;
2373   MatScalar    *ba;
2374   MPI_Status   stat;
2375 
2376   PetscFunctionBegin;
2377   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2378   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2379 
2380   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2381   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2382 
2383   bs   = a->bs;
2384   mbs  = a->mbs;
2385   Mbs  = a->Mbs;
2386   ba   = b->a;
2387   bi   = b->i;
2388   bj   = b->j;
2389   /*
2390   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2391   PetscSynchronizedFlush(A->comm);
2392   */
2393 
2394   /* find ownerships */
2395   rowners_bs = a->rowners_bs;
2396   /*
2397   if (!rank){
2398     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2399   }
2400   */
2401 
2402   /* each proc creates an array to be distributed */
2403   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2404   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2405 
2406   /* row_max for B */
2407   if (rank != size-1){
2408     for (i=0; i<mbs; i++) {
2409       ncols = bi[1] - bi[0]; bi++;
2410       brow  = bs*i;
2411       for (j=0; j<ncols; j++){
2412         bcol = bs*(*bj);
2413         for (kcol=0; kcol<bs; kcol++){
2414           col = bcol + kcol;                 /* local col index */
2415           col += rowners_bs[rank+1];      /* global col index */
2416           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2417           for (krow=0; krow<bs; krow++){
2418             atmp = PetscAbsScalar(*ba); ba++;
2419             row = brow + krow;    /* local row index */
2420             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2421             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2422             if (work[col] < atmp) work[col] = atmp;
2423           }
2424         }
2425         bj++;
2426       }
2427     }
2428     /*
2429       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2430       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2431       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2432       */
2433 
2434     /* send values to its owners */
2435     for (dest=rank+1; dest<size; dest++){
2436       svalues = work + rowners_bs[dest];
2437       count   = rowners_bs[dest+1]-rowners_bs[dest];
2438       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2439       /*
2440       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2441       PetscSynchronizedFlush(A->comm);
2442       */
2443     }
2444   }
2445 
2446   /* receive values */
2447   if (rank){
2448     rvalues = work;
2449     count   = rowners_bs[rank+1]-rowners_bs[rank];
2450     for (source=0; source<rank; source++){
2451       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2452       /* process values */
2453       for (i=0; i<count; i++){
2454         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2455       }
2456       /*
2457       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2458       PetscSynchronizedFlush(A->comm);
2459       */
2460     }
2461   }
2462 
2463   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2464   ierr = PetscFree(work);CHKERRQ(ierr);
2465   PetscFunctionReturn(0);
2466 }
2467 
2468 #undef __FUNCT__
2469 #define __FUNCT__ "MatRelax_MPISBAIJ"
2470 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2471 {
2472   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2473   int            ierr,mbs=mat->mbs,bs=mat->bs;
2474   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2475   Vec            bb1;
2476 
2477   PetscFunctionBegin;
2478   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2479   if (bs > 1)
2480     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2481 
2482   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2483     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2484       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2485       its--;
2486     }
2487 
2488     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2489     while (its--){
2490 
2491       /* lower triangular part: slvec0b = - B^T*xx */
2492       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2493 
2494       /* copy xx into slvec0a */
2495       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2496       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2497       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2498       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2499 
2500       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2501 
2502       /* copy bb into slvec1a */
2503       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2504       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2505       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2506       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2507 
2508       /* set slvec1b = 0 */
2509       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2510 
2511       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2512       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2513       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2514       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2515 
2516       /* upper triangular part: bb1 = bb1 - B*x */
2517       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2518 
2519       /* local diagonal sweep */
2520       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2521     }
2522     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2523   } else {
2524     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2525   }
2526   PetscFunctionReturn(0);
2527 }
2528 
2529 #undef __FUNCT__
2530 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2531 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2532 {
2533   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2534   int            ierr;
2535   PetscScalar    mone=-1.0;
2536   Vec            lvec1,bb1;
2537 
2538   PetscFunctionBegin;
2539   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2540   if (mat->bs > 1)
2541     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2542 
2543   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2544     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2545       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2546       its--;
2547     }
2548 
2549     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2550     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2551     while (its--){
2552       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2553 
2554       /* lower diagonal part: bb1 = bb - B^T*xx */
2555       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2556       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2557 
2558       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2559       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2560       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2561 
2562       /* upper diagonal part: bb1 = bb1 - B*x */
2563       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2564       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2565 
2566       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2567 
2568       /* diagonal sweep */
2569       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2570     }
2571     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2572     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2573   } else {
2574     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2575   }
2576   PetscFunctionReturn(0);
2577 }
2578 
2579