xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 9c30b7d2697335155d7490a7e085415ee7b4a02a)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 extern int MatSetUpMultiply_MPISBAIJ(Mat);
7 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 extern int DisAssemble_MPISBAIJ(Mat);
9 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
10 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 int MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   int          ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 int MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   int          ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   int          ierr,i,N = m*n;
238   MatScalar    *vsingle;
239 
240   PetscFunctionBegin;
241   if (N > b->setvalueslen) {
242     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
243     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
244     b->setvalueslen  = N;
245   }
246   vsingle = b->setvaluescopy;
247 
248   for (i=0; i<N; i++) {
249     vsingle[i] = v[i];
250   }
251   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
258 {
259   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
260   int         ierr,i,N = m*n*b->bs2;
261   MatScalar   *vsingle;
262 
263   PetscFunctionBegin;
264   if (N > b->setvalueslen) {
265     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
266     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
267     b->setvalueslen  = N;
268   }
269   vsingle = b->setvaluescopy;
270   for (i=0; i<N; i++) {
271     vsingle[i] = v[i];
272   }
273   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
279 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
282   int         ierr,i,N = m*n;
283   MatScalar   *vsingle;
284 
285   PetscFunctionBegin;
286   SETERRQ(1,"Function not yet written for SBAIJ format");
287   /* PetscFunctionReturn(0); */
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
292 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
293 {
294   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
295   int         ierr,i,N = m*n*b->bs2;
296   MatScalar   *vsingle;
297 
298   PetscFunctionBegin;
299   SETERRQ(1,"Function not yet written for SBAIJ format");
300   /* PetscFunctionReturn(0); */
301 }
302 #endif
303 
304 /* Only add/insert a(i,j) with i<=j (blocks).
305    Any a(i,j) with i>j input by user is ingored.
306 */
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
309 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
310 {
311   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
312   MatScalar    value;
313   PetscTruth   roworiented = baij->roworiented;
314   int          ierr,i,j,row,col;
315   int          rstart_orig=baij->rstart_bs;
316   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
317   int          cend_orig=baij->cend_bs,bs=baij->bs;
318 
319   /* Some Variables required in the macro */
320   Mat          A = baij->A;
321   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
322   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar    *aa=a->a;
324 
325   Mat          B = baij->B;
326   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
327   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar    *ba=b->a;
329 
330   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   int          low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar    *ap,*bap;
333 
334   /* for stash */
335   int          n_loc, *in_loc=0;
336   MatScalar    *v_loc=0;
337 
338   PetscFunctionBegin;
339 
340   if(!baij->donotstash){
341     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
342     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
343   }
344 
345   for (i=0; i<m; i++) {
346     if (im[i] < 0) continue;
347 #if defined(PETSC_USE_BOPT_g)
348     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
349 #endif
350     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
351       row = im[i] - rstart_orig;              /* local row index */
352       for (j=0; j<n; j++) {
353         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
354         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
355           col = in[j] - cstart_orig;          /* local col index */
356           brow = row/bs; bcol = col/bs;
357           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
358           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
359           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
360           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
361         } else if (in[j] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
364 #endif
365         else {  /* off-diag entry (B) */
366           if (mat->was_assembled) {
367             if (!baij->colmap) {
368               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
369             }
370 #if defined (PETSC_USE_CTABLE)
371             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
372             col  = col - 1;
373 #else
374             col = baij->colmap[in[j]/bs] - 1;
375 #endif
376             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
377               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
378               col =  in[j];
379               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
380               B = baij->B;
381               b = (Mat_SeqBAIJ*)(B)->data;
382               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
383               ba=b->a;
384             } else col += in[j]%bs;
385           } else col = in[j];
386           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
387           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
388           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
389         }
390       }
391     } else {  /* off processor entry */
392       if (!baij->donotstash) {
393         n_loc = 0;
394         for (j=0; j<n; j++){
395           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
396           in_loc[n_loc] = in[j];
397           if (roworiented) {
398             v_loc[n_loc] = v[i*n+j];
399           } else {
400             v_loc[n_loc] = v[j*m+i];
401           }
402           n_loc++;
403         }
404         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
405       }
406     }
407   }
408 
409   if(!baij->donotstash){
410     ierr = PetscFree(in_loc);CHKERRQ(ierr);
411     ierr = PetscFree(v_loc);CHKERRQ(ierr);
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
418 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
419 {
420   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
421   const MatScalar *value;
422   MatScalar       *barray=baij->barray;
423   PetscTruth      roworiented = baij->roworiented;
424   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
425   int             rend=baij->rend,cstart=baij->cstart,stepval;
426   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
427 
428   PetscFunctionBegin;
429   if(!barray) {
430     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
431     baij->barray = barray;
432   }
433 
434   if (roworiented) {
435     stepval = (n-1)*bs;
436   } else {
437     stepval = (m-1)*bs;
438   }
439   for (i=0; i<m; i++) {
440     if (im[i] < 0) continue;
441 #if defined(PETSC_USE_BOPT_g)
442     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
443 #endif
444     if (im[i] >= rstart && im[i] < rend) {
445       row = im[i] - rstart;
446       for (j=0; j<n; j++) {
447         /* If NumCol = 1 then a copy is not required */
448         if ((roworiented) && (n == 1)) {
449           barray = (MatScalar*) v + i*bs2;
450         } else if((!roworiented) && (m == 1)) {
451           barray = (MatScalar*) v + j*bs2;
452         } else { /* Here a copy is required */
453           if (roworiented) {
454             value = v + i*(stepval+bs)*bs + j*bs;
455           } else {
456             value = v + j*(stepval+bs)*bs + i*bs;
457           }
458           for (ii=0; ii<bs; ii++,value+=stepval) {
459             for (jj=0; jj<bs; jj++) {
460               *barray++  = *value++;
461             }
462           }
463           barray -=bs2;
464         }
465 
466         if (in[j] >= cstart && in[j] < cend){
467           col  = in[j] - cstart;
468           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
469         }
470         else if (in[j] < 0) continue;
471 #if defined(PETSC_USE_BOPT_g)
472         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
473 #endif
474         else {
475           if (mat->was_assembled) {
476             if (!baij->colmap) {
477               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
478             }
479 
480 #if defined(PETSC_USE_BOPT_g)
481 #if defined (PETSC_USE_CTABLE)
482             { int data;
483               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
484               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
485             }
486 #else
487             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
488 #endif
489 #endif
490 #if defined (PETSC_USE_CTABLE)
491 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
492             col  = (col - 1)/bs;
493 #else
494             col = (baij->colmap[in[j]] - 1)/bs;
495 #endif
496             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
497               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
498               col =  in[j];
499             }
500           }
501           else col = in[j];
502           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
503         }
504       }
505     } else {
506       if (!baij->donotstash) {
507         if (roworiented) {
508           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
509         } else {
510           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
511         }
512       }
513     }
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 #define HASH_KEY 0.6180339887
519 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
520 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
521 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 #undef __FUNCT__
523 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
524 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
525 {
526   PetscFunctionBegin;
527   SETERRQ(1,"Function not yet written for SBAIJ format");
528   /* PetscFunctionReturn(0); */
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
533 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
534 {
535   PetscFunctionBegin;
536   SETERRQ(1,"Function not yet written for SBAIJ format");
537   /* PetscFunctionReturn(0); */
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatGetValues_MPISBAIJ"
542 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
543 {
544   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
545   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
546   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
547 
548   PetscFunctionBegin;
549   for (i=0; i<m; i++) {
550     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
551     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
552     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
553       row = idxm[i] - bsrstart;
554       for (j=0; j<n; j++) {
555         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
556         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
557         if (idxn[j] >= bscstart && idxn[j] < bscend){
558           col = idxn[j] - bscstart;
559           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
560         } else {
561           if (!baij->colmap) {
562             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
563           }
564 #if defined (PETSC_USE_CTABLE)
565           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
566           data --;
567 #else
568           data = baij->colmap[idxn[j]/bs]-1;
569 #endif
570           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
571           else {
572             col  = data + idxn[j]%bs;
573             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
574           }
575         }
576       }
577     } else {
578       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
579     }
580   }
581  PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatNorm_MPISBAIJ"
586 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
587 {
588   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
589   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
590   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
591   int        ierr;
592   PetscReal  sum[2],*lnorm2;
593 
594   PetscFunctionBegin;
595   if (baij->size == 1) {
596     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
597   } else {
598     if (type == NORM_FROBENIUS) {
599       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
600       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
601       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
602       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
603       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
604       /*
605       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
606       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
607       */
608       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
609       /*
610       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
611       PetscSynchronizedFlush(mat->comm); */
612 
613       *norm = sqrt(sum[0] + 2*sum[1]);
614       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
617     }
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 /*
623   Creates the hash table, and sets the table
624   This table is created only once.
625   If new entried need to be added to the matrix
626   then the hash table has to be destroyed and
627   recreated.
628 */
629 #undef __FUNCT__
630 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
631 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
632 {
633   PetscFunctionBegin;
634   SETERRQ(1,"Function not yet written for SBAIJ format");
635   /* PetscFunctionReturn(0); */
636 }
637 
638 #undef __FUNCT__
639 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
640 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
641 {
642   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
643   int         ierr,nstash,reallocs;
644   InsertMode  addv;
645 
646   PetscFunctionBegin;
647   if (baij->donotstash) {
648     PetscFunctionReturn(0);
649   }
650 
651   /* make sure all processors are either in INSERTMODE or ADDMODE */
652   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
653   if (addv == (ADD_VALUES|INSERT_VALUES)) {
654     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
655   }
656   mat->insertmode = addv; /* in case this processor had no cache */
657 
658   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
659   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
660   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
661   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
669 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
672   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
673   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
674   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
675   int         *row,*col,other_disassembled;
676   PetscTruth  r1,r2,r3;
677   MatScalar   *val;
678   InsertMode  addv = mat->insertmode;
679 
680   PetscFunctionBegin;
681 
682   if (!baij->donotstash) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       /*
686       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
687       PetscSynchronizedFlush(mat->comm);
688       */
689       if (!flg) break;
690 
691       for (i=0; i<n;) {
692         /* Now identify the consecutive vals belonging to the same row */
693         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
694         if (j < n) ncols = j-i;
695         else       ncols = n-i;
696         /* Now assemble all these values with a single function call */
697         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702     /* Now process the block-stash. Since the values are stashed column-oriented,
703        set the roworiented flag to column oriented, and after MatSetValues()
704        restore the original flags */
705     r1 = baij->roworiented;
706     r2 = a->roworiented;
707     r3 = b->roworiented;
708     baij->roworiented = PETSC_FALSE;
709     a->roworiented    = PETSC_FALSE;
710     b->roworiented    = PETSC_FALSE;
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
718         if (j < n) ncols = j-i;
719         else       ncols = n-i;
720         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
721         i = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
725     baij->roworiented = r1;
726     a->roworiented    = r2;
727     b->roworiented    = r3;
728   }
729 
730   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
731   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
732 
733   /* determine if any processor has disassembled, if so we must
734      also disassemble ourselfs, in order that we may reassemble. */
735   /*
736      if nonzero structure of submatrix B cannot change then we know that
737      no processor disassembled thus we can skip this stuff
738   */
739   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
740     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
741     if (mat->was_assembled && !other_disassembled) {
742       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
743     }
744   }
745 
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
748   }
749   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
750   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
751 
752 #if defined(PETSC_USE_BOPT_g)
753   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
754     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
755     baij->ht_total_ct  = 0;
756     baij->ht_insert_ct = 0;
757   }
758 #endif
759   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
760     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
761     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
762     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
763   }
764 
765   if (baij->rowvalues) {
766     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
767     baij->rowvalues = 0;
768   }
769 
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
775 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
776 {
777   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
778   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
779   PetscTruth        isascii,isdraw;
780   PetscViewer       sviewer;
781   PetscViewerFormat format;
782 
783   PetscFunctionBegin;
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
786   if (isascii) {
787     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
788     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
789       MatInfo info;
790       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
791       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
793               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
794               baij->bs,(int)info.memory);CHKERRQ(ierr);
795       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
796       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
797       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
798       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
799       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
800       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
801       PetscFunctionReturn(0);
802     } else if (format == PETSC_VIEWER_ASCII_INFO) {
803       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw       draw;
810     PetscTruth isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
813   }
814 
815   if (size == 1) {
816     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
817     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
818   } else {
819     /* assemble the entire matrix onto first processor. */
820     Mat         A;
821     Mat_SeqSBAIJ *Aloc;
822     Mat_SeqBAIJ *Bloc;
823     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
824     MatScalar   *a;
825 
826     /* Should this be the same type as mat? */
827     if (!rank) {
828       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
829     } else {
830       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
831     }
832     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
833     ierr = MatMPISBAIJSetPreallocation(A,baij->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
834     PetscLogObjectParent(mat,A);
835 
836     /* copy over the A part */
837     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
838     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
839     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
840 
841     for (i=0; i<mbs; i++) {
842       rvals[0] = bs*(baij->rstart + i);
843       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
844       for (j=ai[i]; j<ai[i+1]; j++) {
845         col = (baij->cstart+aj[j])*bs;
846         for (k=0; k<bs; k++) {
847           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
848           col++; a += bs;
849         }
850       }
851     }
852     /* copy over the B part */
853     Bloc = (Mat_SeqBAIJ*)baij->B->data;
854     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
855     for (i=0; i<mbs; i++) {
856       rvals[0] = bs*(baij->rstart + i);
857       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
858       for (j=ai[i]; j<ai[i+1]; j++) {
859         col = baij->garray[aj[j]]*bs;
860         for (k=0; k<bs; k++) {
861           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
862           col++; a += bs;
863         }
864       }
865     }
866     ierr = PetscFree(rvals);CHKERRQ(ierr);
867     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
868     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
869     /*
870        Everyone has to call to draw the matrix since the graphics waits are
871        synchronized across all processors that share the PetscDraw object
872     */
873     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
874     if (!rank) {
875       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
876       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
877     }
878     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
879     ierr = MatDestroy(A);CHKERRQ(ierr);
880   }
881   PetscFunctionReturn(0);
882 }
883 
884 #undef __FUNCT__
885 #define __FUNCT__ "MatView_MPISBAIJ"
886 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
887 {
888   int        ierr;
889   PetscTruth isascii,isdraw,issocket,isbinary;
890 
891   PetscFunctionBegin;
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
894   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
895   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
896   if (isascii || isdraw || issocket || isbinary) {
897     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
898   } else {
899     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
900   }
901   PetscFunctionReturn(0);
902 }
903 
904 #undef __FUNCT__
905 #define __FUNCT__ "MatDestroy_MPISBAIJ"
906 int MatDestroy_MPISBAIJ(Mat mat)
907 {
908   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
909   int         ierr;
910 
911   PetscFunctionBegin;
912 #if defined(PETSC_USE_LOG)
913   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
914 #endif
915   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
916   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
917   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
918   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
919   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
920 #if defined (PETSC_USE_CTABLE)
921   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
922 #else
923   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
924 #endif
925   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
926   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
927   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
928   if (baij->slvec0) {
929     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
930     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
931   }
932   if (baij->slvec1) {
933     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
934     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
935     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
936   }
937   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
938   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
939   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
940   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
941 #if defined(PETSC_USE_MAT_SINGLE)
942   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
943 #endif
944   ierr = PetscFree(baij);CHKERRQ(ierr);
945   PetscFunctionReturn(0);
946 }
947 
948 #undef __FUNCT__
949 #define __FUNCT__ "MatMult_MPISBAIJ"
950 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
951 {
952   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
953   int         ierr,nt,mbs=a->mbs,bs=a->bs;
954   PetscScalar *x,*from,zero=0.0;
955 
956   PetscFunctionBegin;
957   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
958   if (nt != A->n) {
959     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
960   }
961   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
962   if (nt != A->m) {
963     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
964   }
965 
966   /* diagonal part */
967   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
968   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
969 
970   /* subdiagonal part */
971   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
972 
973   /* copy x into the vec slvec0 */
974   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
975   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
976   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
977   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
978 
979   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
980   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
981   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
982 
983   /* supperdiagonal part */
984   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
985 
986   PetscFunctionReturn(0);
987 }
988 
989 #undef __FUNCT__
990 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
991 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
992 {
993   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
994   int         ierr,nt;
995 
996   PetscFunctionBegin;
997   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
998   if (nt != A->n) {
999     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1000   }
1001   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1002   if (nt != A->m) {
1003     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1004   }
1005 
1006   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1007   /* do diagonal part */
1008   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1009   /* do supperdiagonal part */
1010   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1011   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1012   /* do subdiagonal part */
1013   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1014   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1015   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1016 
1017   PetscFunctionReturn(0);
1018 }
1019 
1020 #undef __FUNCT__
1021 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1022 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1023 {
1024   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1025   int          ierr,mbs=a->mbs,bs=a->bs;
1026   PetscScalar  *x,*from,zero=0.0;
1027 
1028   PetscFunctionBegin;
1029   /*
1030   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1031   PetscSynchronizedFlush(A->comm);
1032   */
1033   /* diagonal part */
1034   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1035   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1036 
1037   /* subdiagonal part */
1038   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1039 
1040   /* copy x into the vec slvec0 */
1041   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1042   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1043   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1044   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1045 
1046   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1047   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1048   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1049 
1050   /* supperdiagonal part */
1051   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1052 
1053   PetscFunctionReturn(0);
1054 }
1055 
1056 #undef __FUNCT__
1057 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1058 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1059 {
1060   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1061   int        ierr;
1062 
1063   PetscFunctionBegin;
1064   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1065   /* do diagonal part */
1066   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1067   /* do supperdiagonal part */
1068   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1069   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1070 
1071   /* do subdiagonal part */
1072   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1073   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1074   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1075 
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 #undef __FUNCT__
1080 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1081 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1082 {
1083   int ierr;
1084 
1085   PetscFunctionBegin;
1086   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1087   PetscFunctionReturn(0);
1088 }
1089 
1090 #undef __FUNCT__
1091 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1092 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1093 {
1094   int ierr;
1095 
1096   PetscFunctionBegin;
1097   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 /*
1102   This only works correctly for square matrices where the subblock A->A is the
1103    diagonal block
1104 */
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1107 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1108 {
1109   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1110   int         ierr;
1111 
1112   PetscFunctionBegin;
1113   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1114   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatScale_MPISBAIJ"
1120 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1121 {
1122   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1123   int         ierr;
1124 
1125   PetscFunctionBegin;
1126   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1127   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1128   PetscFunctionReturn(0);
1129 }
1130 
1131 #undef __FUNCT__
1132 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1133 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1134 {
1135   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1136   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1137   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1138   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1139   int            *cmap,*idx_p,cstart = mat->cstart;
1140 
1141   PetscFunctionBegin;
1142   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1143   mat->getrowactive = PETSC_TRUE;
1144 
1145   if (!mat->rowvalues && (idx || v)) {
1146     /*
1147         allocate enough space to hold information from the longest row.
1148     */
1149     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1150     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1151     int     max = 1,mbs = mat->mbs,tmp;
1152     for (i=0; i<mbs; i++) {
1153       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1154       if (max < tmp) { max = tmp; }
1155     }
1156     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1157     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1158   }
1159 
1160   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1161   lrow = row - brstart;  /* local row index */
1162 
1163   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1164   if (!v)   {pvA = 0; pvB = 0;}
1165   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1166   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1167   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1168   nztot = nzA + nzB;
1169 
1170   cmap  = mat->garray;
1171   if (v  || idx) {
1172     if (nztot) {
1173       /* Sort by increasing column numbers, assuming A and B already sorted */
1174       int imark = -1;
1175       if (v) {
1176         *v = v_p = mat->rowvalues;
1177         for (i=0; i<nzB; i++) {
1178           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1179           else break;
1180         }
1181         imark = i;
1182         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1183         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1184       }
1185       if (idx) {
1186         *idx = idx_p = mat->rowindices;
1187         if (imark > -1) {
1188           for (i=0; i<imark; i++) {
1189             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1190           }
1191         } else {
1192           for (i=0; i<nzB; i++) {
1193             if (cmap[cworkB[i]/bs] < cstart)
1194               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1195             else break;
1196           }
1197           imark = i;
1198         }
1199         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1200         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1201       }
1202     } else {
1203       if (idx) *idx = 0;
1204       if (v)   *v   = 0;
1205     }
1206   }
1207   *nz = nztot;
1208   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1209   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1215 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1216 {
1217   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1218 
1219   PetscFunctionBegin;
1220   if (baij->getrowactive == PETSC_FALSE) {
1221     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1222   }
1223   baij->getrowactive = PETSC_FALSE;
1224   PetscFunctionReturn(0);
1225 }
1226 
1227 #undef __FUNCT__
1228 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1229 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1230 {
1231   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1232 
1233   PetscFunctionBegin;
1234   *bs = baij->bs;
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 #undef __FUNCT__
1239 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1240 int MatZeroEntries_MPISBAIJ(Mat A)
1241 {
1242   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1243   int         ierr;
1244 
1245   PetscFunctionBegin;
1246   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1247   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1253 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1254 {
1255   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1256   Mat         A = a->A,B = a->B;
1257   int         ierr;
1258   PetscReal   isend[5],irecv[5];
1259 
1260   PetscFunctionBegin;
1261   info->block_size     = (PetscReal)a->bs;
1262   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1263   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1264   isend[3] = info->memory;  isend[4] = info->mallocs;
1265   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1266   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1267   isend[3] += info->memory;  isend[4] += info->mallocs;
1268   if (flag == MAT_LOCAL) {
1269     info->nz_used      = isend[0];
1270     info->nz_allocated = isend[1];
1271     info->nz_unneeded  = isend[2];
1272     info->memory       = isend[3];
1273     info->mallocs      = isend[4];
1274   } else if (flag == MAT_GLOBAL_MAX) {
1275     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1276     info->nz_used      = irecv[0];
1277     info->nz_allocated = irecv[1];
1278     info->nz_unneeded  = irecv[2];
1279     info->memory       = irecv[3];
1280     info->mallocs      = irecv[4];
1281   } else if (flag == MAT_GLOBAL_SUM) {
1282     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1283     info->nz_used      = irecv[0];
1284     info->nz_allocated = irecv[1];
1285     info->nz_unneeded  = irecv[2];
1286     info->memory       = irecv[3];
1287     info->mallocs      = irecv[4];
1288   } else {
1289     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1290   }
1291   info->rows_global       = (PetscReal)A->M;
1292   info->columns_global    = (PetscReal)A->N;
1293   info->rows_local        = (PetscReal)A->m;
1294   info->columns_local     = (PetscReal)A->N;
1295   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1296   info->fill_ratio_needed = 0;
1297   info->factor_mallocs    = 0;
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1303 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1304 {
1305   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1306   int         ierr;
1307 
1308   PetscFunctionBegin;
1309   switch (op) {
1310   case MAT_NO_NEW_NONZERO_LOCATIONS:
1311   case MAT_YES_NEW_NONZERO_LOCATIONS:
1312   case MAT_COLUMNS_UNSORTED:
1313   case MAT_COLUMNS_SORTED:
1314   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1315   case MAT_KEEP_ZEROED_ROWS:
1316   case MAT_NEW_NONZERO_LOCATION_ERR:
1317     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1318     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1319     break;
1320   case MAT_ROW_ORIENTED:
1321     a->roworiented = PETSC_TRUE;
1322     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1323     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1324     break;
1325   case MAT_ROWS_SORTED:
1326   case MAT_ROWS_UNSORTED:
1327   case MAT_YES_NEW_DIAGONALS:
1328     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1329     break;
1330   case MAT_COLUMN_ORIENTED:
1331     a->roworiented = PETSC_FALSE;
1332     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1333     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1334     break;
1335   case MAT_IGNORE_OFF_PROC_ENTRIES:
1336     a->donotstash = PETSC_TRUE;
1337     break;
1338   case MAT_NO_NEW_DIAGONALS:
1339     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1340   case MAT_USE_HASH_TABLE:
1341     a->ht_flag = PETSC_TRUE;
1342     break;
1343   case MAT_NOT_SYMMETRIC:
1344   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1345   case MAT_HERMITIAN:
1346     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1347   case MAT_SYMMETRIC:
1348   case MAT_STRUCTURALLY_SYMMETRIC:
1349   case MAT_NOT_HERMITIAN:
1350   case MAT_SYMMETRY_ETERNAL:
1351   case MAT_NOT_SYMMETRY_ETERNAL:
1352     break;
1353   default:
1354     SETERRQ(PETSC_ERR_SUP,"unknown option");
1355   }
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1361 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1362 {
1363   int ierr;
1364   PetscFunctionBegin;
1365   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1371 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1372 {
1373   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1374   Mat         a = baij->A,b = baij->B;
1375   int         ierr,s1,s2,s3;
1376 
1377   PetscFunctionBegin;
1378   if (ll != rr) {
1379     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1380   }
1381   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1382   if (rr) {
1383     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1384     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1385     /* Overlap communication with computation. */
1386     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1387     /*} if (ll) { */
1388     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1389     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1390     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1391     /* } */
1392   /* scale  the diagonal block */
1393   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1394 
1395   /* if (rr) { */
1396     /* Do a scatter end and then right scale the off-diagonal block */
1397     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1398     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1399   }
1400 
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1406 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1407 {
1408   PetscFunctionBegin;
1409   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1410 }
1411 
1412 #undef __FUNCT__
1413 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1414 int MatPrintHelp_MPISBAIJ(Mat A)
1415 {
1416   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1417   MPI_Comm    comm = A->comm;
1418   static int  called = 0;
1419   int         ierr;
1420 
1421   PetscFunctionBegin;
1422   if (!a->rank) {
1423     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1424   }
1425   if (called) {PetscFunctionReturn(0);} else called = 1;
1426   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1427   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1428   PetscFunctionReturn(0);
1429 }
1430 
1431 #undef __FUNCT__
1432 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1433 int MatSetUnfactored_MPISBAIJ(Mat A)
1434 {
1435   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1436   int         ierr;
1437 
1438   PetscFunctionBegin;
1439   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1440   PetscFunctionReturn(0);
1441 }
1442 
1443 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1444 
1445 #undef __FUNCT__
1446 #define __FUNCT__ "MatEqual_MPISBAIJ"
1447 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1448 {
1449   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1450   Mat         a,b,c,d;
1451   PetscTruth  flg;
1452   int         ierr;
1453 
1454   PetscFunctionBegin;
1455   a = matA->A; b = matA->B;
1456   c = matB->A; d = matB->B;
1457 
1458   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1459   if (flg == PETSC_TRUE) {
1460     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1461   }
1462   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1468 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1469 {
1470   int        ierr;
1471 
1472   PetscFunctionBegin;
1473   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1474   PetscFunctionReturn(0);
1475 }
1476 
1477 #undef __FUNCT__
1478 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1479 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1480 {
1481   int        i,ierr;
1482   PetscTruth flg;
1483 
1484   for (i=0; i<n; i++) {
1485     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1486     if (!flg) {
1487       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1488     }
1489   }
1490   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 
1495 /* -------------------------------------------------------------------*/
1496 static struct _MatOps MatOps_Values = {
1497        MatSetValues_MPISBAIJ,
1498        MatGetRow_MPISBAIJ,
1499        MatRestoreRow_MPISBAIJ,
1500        MatMult_MPISBAIJ,
1501 /* 4*/ MatMultAdd_MPISBAIJ,
1502        MatMultTranspose_MPISBAIJ,
1503        MatMultTransposeAdd_MPISBAIJ,
1504        0,
1505        0,
1506        0,
1507 /*10*/ 0,
1508        0,
1509        0,
1510        MatRelax_MPISBAIJ,
1511        MatTranspose_MPISBAIJ,
1512 /*15*/ MatGetInfo_MPISBAIJ,
1513        MatEqual_MPISBAIJ,
1514        MatGetDiagonal_MPISBAIJ,
1515        MatDiagonalScale_MPISBAIJ,
1516        MatNorm_MPISBAIJ,
1517 /*20*/ MatAssemblyBegin_MPISBAIJ,
1518        MatAssemblyEnd_MPISBAIJ,
1519        0,
1520        MatSetOption_MPISBAIJ,
1521        MatZeroEntries_MPISBAIJ,
1522 /*25*/ MatZeroRows_MPISBAIJ,
1523        0,
1524        0,
1525        0,
1526        0,
1527 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1528        0,
1529        0,
1530        0,
1531        0,
1532 /*35*/ MatDuplicate_MPISBAIJ,
1533        0,
1534        0,
1535        0,
1536        0,
1537 /*40*/ 0,
1538        MatGetSubMatrices_MPISBAIJ,
1539        MatIncreaseOverlap_MPISBAIJ,
1540        MatGetValues_MPISBAIJ,
1541        0,
1542 /*45*/ MatPrintHelp_MPISBAIJ,
1543        MatScale_MPISBAIJ,
1544        0,
1545        0,
1546        0,
1547 /*50*/ MatGetBlockSize_MPISBAIJ,
1548        0,
1549        0,
1550        0,
1551        0,
1552 /*55*/ 0,
1553        0,
1554        MatSetUnfactored_MPISBAIJ,
1555        0,
1556        MatSetValuesBlocked_MPISBAIJ,
1557 /*60*/ 0,
1558        0,
1559        0,
1560        MatGetPetscMaps_Petsc,
1561        0,
1562 /*65*/ 0,
1563        0,
1564        0,
1565        0,
1566        0,
1567 /*70*/ MatGetRowMax_MPISBAIJ,
1568        0,
1569        0,
1570        0,
1571        0,
1572 /*75*/ 0,
1573        0,
1574        0,
1575        0,
1576        0,
1577 /*80*/ 0,
1578        0,
1579        0,
1580        0,
1581 /*85*/ MatLoad_MPISBAIJ
1582 };
1583 
1584 
1585 EXTERN_C_BEGIN
1586 #undef __FUNCT__
1587 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1588 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1589 {
1590   PetscFunctionBegin;
1591   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1592   *iscopy = PETSC_FALSE;
1593   PetscFunctionReturn(0);
1594 }
1595 EXTERN_C_END
1596 
1597 EXTERN_C_BEGIN
1598 #undef __FUNCT__
1599 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1600 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1601 {
1602   Mat_MPISBAIJ *b;
1603   int          ierr,i,mbs,Mbs;
1604 
1605   PetscFunctionBegin;
1606   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1607 
1608   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1609   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1610   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1611   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1612   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1613   if (d_nnz) {
1614     for (i=0; i<B->m/bs; i++) {
1615       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1616     }
1617   }
1618   if (o_nnz) {
1619     for (i=0; i<B->m/bs; i++) {
1620       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1621     }
1622   }
1623   B->preallocated = PETSC_TRUE;
1624   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1625   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1626   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1627   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1628 
1629   b   = (Mat_MPISBAIJ*)B->data;
1630   mbs = B->m/bs;
1631   Mbs = B->M/bs;
1632   if (mbs*bs != B->m) {
1633     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1634   }
1635 
1636   b->bs  = bs;
1637   b->bs2 = bs*bs;
1638   b->mbs = mbs;
1639   b->nbs = mbs;
1640   b->Mbs = Mbs;
1641   b->Nbs = Mbs;
1642 
1643   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1644   b->rowners[0]    = 0;
1645   for (i=2; i<=b->size; i++) {
1646     b->rowners[i] += b->rowners[i-1];
1647   }
1648   b->rstart    = b->rowners[b->rank];
1649   b->rend      = b->rowners[b->rank+1];
1650   b->cstart    = b->rstart;
1651   b->cend      = b->rend;
1652   for (i=0; i<=b->size; i++) {
1653     b->rowners_bs[i] = b->rowners[i]*bs;
1654   }
1655   b->rstart_bs = b-> rstart*bs;
1656   b->rend_bs   = b->rend*bs;
1657 
1658   b->cstart_bs = b->cstart*bs;
1659   b->cend_bs   = b->cend*bs;
1660 
1661   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1662   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1663   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1664   PetscLogObjectParent(B,b->A);
1665 
1666   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1667   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1668   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1669   PetscLogObjectParent(B,b->B);
1670 
1671   /* build cache for off array entries formed */
1672   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1673 
1674   PetscFunctionReturn(0);
1675 }
1676 EXTERN_C_END
1677 
1678 /*MC
1679    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1680    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1681 
1682    Options Database Keys:
1683 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1684 
1685   Level: beginner
1686 
1687 .seealso: MatCreateMPISBAIJ
1688 M*/
1689 
1690 EXTERN_C_BEGIN
1691 #undef __FUNCT__
1692 #define __FUNCT__ "MatCreate_MPISBAIJ"
1693 int MatCreate_MPISBAIJ(Mat B)
1694 {
1695   Mat_MPISBAIJ *b;
1696   int          ierr;
1697   PetscTruth   flg;
1698 
1699   PetscFunctionBegin;
1700 
1701   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1702   B->data = (void*)b;
1703   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1704   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1705 
1706   B->ops->destroy    = MatDestroy_MPISBAIJ;
1707   B->ops->view       = MatView_MPISBAIJ;
1708   B->mapping    = 0;
1709   B->factor     = 0;
1710   B->assembled  = PETSC_FALSE;
1711 
1712   B->insertmode = NOT_SET_VALUES;
1713   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1714   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1715 
1716   /* build local table of row and column ownerships */
1717   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1718   b->cowners    = b->rowners + b->size + 2;
1719   b->rowners_bs = b->cowners + b->size + 2;
1720   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1721 
1722   /* build cache for off array entries formed */
1723   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1724   b->donotstash  = PETSC_FALSE;
1725   b->colmap      = PETSC_NULL;
1726   b->garray      = PETSC_NULL;
1727   b->roworiented = PETSC_TRUE;
1728 
1729 #if defined(PETSC_USE_MAT_SINGLE)
1730   /* stuff for MatSetValues_XXX in single precision */
1731   b->setvalueslen     = 0;
1732   b->setvaluescopy    = PETSC_NULL;
1733 #endif
1734 
1735   /* stuff used in block assembly */
1736   b->barray       = 0;
1737 
1738   /* stuff used for matrix vector multiply */
1739   b->lvec         = 0;
1740   b->Mvctx        = 0;
1741   b->slvec0       = 0;
1742   b->slvec0b      = 0;
1743   b->slvec1       = 0;
1744   b->slvec1a      = 0;
1745   b->slvec1b      = 0;
1746   b->sMvctx       = 0;
1747 
1748   /* stuff for MatGetRow() */
1749   b->rowindices   = 0;
1750   b->rowvalues    = 0;
1751   b->getrowactive = PETSC_FALSE;
1752 
1753   /* hash table stuff */
1754   b->ht           = 0;
1755   b->hd           = 0;
1756   b->ht_size      = 0;
1757   b->ht_flag      = PETSC_FALSE;
1758   b->ht_fact      = 0;
1759   b->ht_total_ct  = 0;
1760   b->ht_insert_ct = 0;
1761 
1762   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1763   if (flg) {
1764     PetscReal fact = 1.39;
1765     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1766     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1767     if (fact <= 1.0) fact = 1.39;
1768     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1769     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1770   }
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1772                                      "MatStoreValues_MPISBAIJ",
1773                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1775                                      "MatRetrieveValues_MPISBAIJ",
1776                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1778                                      "MatGetDiagonalBlock_MPISBAIJ",
1779                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1780   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1781                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1782                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1783   B->symmetric                  = PETSC_TRUE;
1784   B->structurally_symmetric     = PETSC_TRUE;
1785   B->symmetric_set              = PETSC_TRUE;
1786   B->structurally_symmetric_set = PETSC_TRUE;
1787   PetscFunctionReturn(0);
1788 }
1789 EXTERN_C_END
1790 
1791 /*MC
1792    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1793 
1794    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1795    and MATMPISBAIJ otherwise.
1796 
1797    Options Database Keys:
1798 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1799 
1800   Level: beginner
1801 
1802 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1803 M*/
1804 
1805 EXTERN_C_BEGIN
1806 #undef __FUNCT__
1807 #define __FUNCT__ "MatCreate_SBAIJ"
1808 int MatCreate_SBAIJ(Mat A) {
1809   int ierr,size;
1810 
1811   PetscFunctionBegin;
1812   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1813   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1814   if (size == 1) {
1815     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1816   } else {
1817     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 EXTERN_C_END
1822 
1823 #undef __FUNCT__
1824 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1825 /*@C
1826    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1827    the user should preallocate the matrix storage by setting the parameters
1828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1829    performance can be increased by more than a factor of 50.
1830 
1831    Collective on Mat
1832 
1833    Input Parameters:
1834 +  A - the matrix
1835 .  bs   - size of blockk
1836 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1837            submatrix  (same for all local rows)
1838 .  d_nnz - array containing the number of block nonzeros in the various block rows
1839            in the upper triangular and diagonal part of the in diagonal portion of the local
1840            (possibly different for each block row) or PETSC_NULL.  You must leave room
1841            for the diagonal entry even if it is zero.
1842 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1843            submatrix (same for all local rows).
1844 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1845            off-diagonal portion of the local submatrix (possibly different for
1846            each block row) or PETSC_NULL.
1847 
1848 
1849    Options Database Keys:
1850 .   -mat_no_unroll - uses code that does not unroll the loops in the
1851                      block calculations (much slower)
1852 .   -mat_block_size - size of the blocks to use
1853 
1854    Notes:
1855 
1856    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1857    than it must be used on all processors that share the object for that argument.
1858 
1859    Storage Information:
1860    For a square global matrix we define each processor's diagonal portion
1861    to be its local rows and the corresponding columns (a square submatrix);
1862    each processor's off-diagonal portion encompasses the remainder of the
1863    local matrix (a rectangular submatrix).
1864 
1865    The user can specify preallocated storage for the diagonal part of
1866    the local submatrix with either d_nz or d_nnz (not both).  Set
1867    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1868    memory allocation.  Likewise, specify preallocated storage for the
1869    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1870 
1871    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1872    the figure below we depict these three local rows and all columns (0-11).
1873 
1874 .vb
1875            0 1 2 3 4 5 6 7 8 9 10 11
1876           -------------------
1877    row 3  |  o o o d d d o o o o o o
1878    row 4  |  o o o d d d o o o o o o
1879    row 5  |  o o o d d d o o o o o o
1880           -------------------
1881 .ve
1882 
1883    Thus, any entries in the d locations are stored in the d (diagonal)
1884    submatrix, and any entries in the o locations are stored in the
1885    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1886    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1887 
1888    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1889    plus the diagonal part of the d matrix,
1890    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1891    In general, for PDE problems in which most nonzeros are near the diagonal,
1892    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1893    or you will get TERRIBLE performance; see the users' manual chapter on
1894    matrices.
1895 
1896    Level: intermediate
1897 
1898 .keywords: matrix, block, aij, compressed row, sparse, parallel
1899 
1900 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1901 @*/
1902 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1903 {
1904   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1905 
1906   PetscFunctionBegin;
1907   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1908   if (f) {
1909     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1910   }
1911   PetscFunctionReturn(0);
1912 }
1913 
1914 #undef __FUNCT__
1915 #define __FUNCT__ "MatCreateMPISBAIJ"
1916 /*@C
1917    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1918    (block compressed row).  For good matrix assembly performance
1919    the user should preallocate the matrix storage by setting the parameters
1920    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1921    performance can be increased by more than a factor of 50.
1922 
1923    Collective on MPI_Comm
1924 
1925    Input Parameters:
1926 +  comm - MPI communicator
1927 .  bs   - size of blockk
1928 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1929            This value should be the same as the local size used in creating the
1930            y vector for the matrix-vector product y = Ax.
1931 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1932            This value should be the same as the local size used in creating the
1933            x vector for the matrix-vector product y = Ax.
1934 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1935 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1936 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1937            submatrix  (same for all local rows)
1938 .  d_nnz - array containing the number of block nonzeros in the various block rows
1939            in the upper triangular portion of the in diagonal portion of the local
1940            (possibly different for each block block row) or PETSC_NULL.
1941            You must leave room for the diagonal entry even if it is zero.
1942 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1943            submatrix (same for all local rows).
1944 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1945            off-diagonal portion of the local submatrix (possibly different for
1946            each block row) or PETSC_NULL.
1947 
1948    Output Parameter:
1949 .  A - the matrix
1950 
1951    Options Database Keys:
1952 .   -mat_no_unroll - uses code that does not unroll the loops in the
1953                      block calculations (much slower)
1954 .   -mat_block_size - size of the blocks to use
1955 .   -mat_mpi - use the parallel matrix data structures even on one processor
1956                (defaults to using SeqBAIJ format on one processor)
1957 
1958    Notes:
1959    The user MUST specify either the local or global matrix dimensions
1960    (possibly both).
1961 
1962    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1963    than it must be used on all processors that share the object for that argument.
1964 
1965    Storage Information:
1966    For a square global matrix we define each processor's diagonal portion
1967    to be its local rows and the corresponding columns (a square submatrix);
1968    each processor's off-diagonal portion encompasses the remainder of the
1969    local matrix (a rectangular submatrix).
1970 
1971    The user can specify preallocated storage for the diagonal part of
1972    the local submatrix with either d_nz or d_nnz (not both).  Set
1973    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1974    memory allocation.  Likewise, specify preallocated storage for the
1975    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1976 
1977    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1978    the figure below we depict these three local rows and all columns (0-11).
1979 
1980 .vb
1981            0 1 2 3 4 5 6 7 8 9 10 11
1982           -------------------
1983    row 3  |  o o o d d d o o o o o o
1984    row 4  |  o o o d d d o o o o o o
1985    row 5  |  o o o d d d o o o o o o
1986           -------------------
1987 .ve
1988 
1989    Thus, any entries in the d locations are stored in the d (diagonal)
1990    submatrix, and any entries in the o locations are stored in the
1991    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1992    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1993 
1994    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1995    plus the diagonal part of the d matrix,
1996    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1997    In general, for PDE problems in which most nonzeros are near the diagonal,
1998    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1999    or you will get TERRIBLE performance; see the users' manual chapter on
2000    matrices.
2001 
2002    Level: intermediate
2003 
2004 .keywords: matrix, block, aij, compressed row, sparse, parallel
2005 
2006 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2007 @*/
2008 
2009 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2010 {
2011   int ierr,size;
2012 
2013   PetscFunctionBegin;
2014   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2015   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2016   if (size > 1) {
2017     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2018     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2019   } else {
2020     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2021     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2022   }
2023   PetscFunctionReturn(0);
2024 }
2025 
2026 
2027 #undef __FUNCT__
2028 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2029 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2030 {
2031   Mat          mat;
2032   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2033   int          ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2034   PetscScalar  *array;
2035 
2036   PetscFunctionBegin;
2037   *newmat       = 0;
2038   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2039   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2040 
2041   mat->factor       = matin->factor;
2042   mat->preallocated = PETSC_TRUE;
2043   mat->assembled    = PETSC_TRUE;
2044   mat->insertmode   = NOT_SET_VALUES;
2045 
2046   a = (Mat_MPISBAIJ*)mat->data;
2047   a->bs  = oldmat->bs;
2048   a->bs2 = oldmat->bs2;
2049   a->mbs = oldmat->mbs;
2050   a->nbs = oldmat->nbs;
2051   a->Mbs = oldmat->Mbs;
2052   a->Nbs = oldmat->Nbs;
2053 
2054   a->rstart       = oldmat->rstart;
2055   a->rend         = oldmat->rend;
2056   a->cstart       = oldmat->cstart;
2057   a->cend         = oldmat->cend;
2058   a->size         = oldmat->size;
2059   a->rank         = oldmat->rank;
2060   a->donotstash   = oldmat->donotstash;
2061   a->roworiented  = oldmat->roworiented;
2062   a->rowindices   = 0;
2063   a->rowvalues    = 0;
2064   a->getrowactive = PETSC_FALSE;
2065   a->barray       = 0;
2066   a->rstart_bs    = oldmat->rstart_bs;
2067   a->rend_bs      = oldmat->rend_bs;
2068   a->cstart_bs    = oldmat->cstart_bs;
2069   a->cend_bs      = oldmat->cend_bs;
2070 
2071   /* hash table stuff */
2072   a->ht           = 0;
2073   a->hd           = 0;
2074   a->ht_size      = 0;
2075   a->ht_flag      = oldmat->ht_flag;
2076   a->ht_fact      = oldmat->ht_fact;
2077   a->ht_total_ct  = 0;
2078   a->ht_insert_ct = 0;
2079 
2080   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2081   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2082   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2083   if (oldmat->colmap) {
2084 #if defined (PETSC_USE_CTABLE)
2085     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2086 #else
2087     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2088     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2089     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2090 #endif
2091   } else a->colmap = 0;
2092 
2093   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2094     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2095     PetscLogObjectMemory(mat,len*sizeof(int));
2096     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2097   } else a->garray = 0;
2098 
2099   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2100   PetscLogObjectParent(mat,a->lvec);
2101   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2102   PetscLogObjectParent(mat,a->Mvctx);
2103 
2104   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2105   PetscLogObjectParent(mat,a->slvec0);
2106   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2107   PetscLogObjectParent(mat,a->slvec1);
2108 
2109   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2110   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2111   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2112   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2113   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2114   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2115   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2116   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2117   PetscLogObjectParent(mat,a->slvec0);
2118   PetscLogObjectParent(mat,a->slvec1);
2119   PetscLogObjectParent(mat,a->slvec0b);
2120   PetscLogObjectParent(mat,a->slvec1a);
2121   PetscLogObjectParent(mat,a->slvec1b);
2122 
2123   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2124   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2125   a->sMvctx = oldmat->sMvctx;
2126   PetscLogObjectParent(mat,a->sMvctx);
2127 
2128   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2129   PetscLogObjectParent(mat,a->A);
2130   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2131   PetscLogObjectParent(mat,a->B);
2132   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2133   *newmat = mat;
2134   PetscFunctionReturn(0);
2135 }
2136 
2137 #include "petscsys.h"
2138 
2139 #undef __FUNCT__
2140 #define __FUNCT__ "MatLoad_MPISBAIJ"
2141 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2142 {
2143   Mat          A;
2144   int          i,nz,ierr,j,rstart,rend,fd;
2145   PetscScalar  *vals,*buf;
2146   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2147   MPI_Status   status;
2148   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2149   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2150   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2151   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2152   int          dcount,kmax,k,nzcount,tmp;
2153 
2154   PetscFunctionBegin;
2155   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2156 
2157   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2158   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2159   if (!rank) {
2160     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2161     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2162     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2163     if (header[3] < 0) {
2164       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2165     }
2166   }
2167 
2168   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2169   M = header[1]; N = header[2];
2170 
2171   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2172 
2173   /*
2174      This code adds extra rows to make sure the number of rows is
2175      divisible by the blocksize
2176   */
2177   Mbs        = M/bs;
2178   extra_rows = bs - M + bs*(Mbs);
2179   if (extra_rows == bs) extra_rows = 0;
2180   else                  Mbs++;
2181   if (extra_rows &&!rank) {
2182     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2183   }
2184 
2185   /* determine ownership of all rows */
2186   mbs        = Mbs/size + ((Mbs % size) > rank);
2187   m          = mbs*bs;
2188   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2189   browners   = rowners + size + 1;
2190   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2191   rowners[0] = 0;
2192   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2193   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2194   rstart = rowners[rank];
2195   rend   = rowners[rank+1];
2196 
2197   /* distribute row lengths to all processors */
2198   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2199   if (!rank) {
2200     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2201     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2202     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2203     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2204     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2205     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2206     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2207   } else {
2208     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2209   }
2210 
2211   if (!rank) {   /* procs[0] */
2212     /* calculate the number of nonzeros on each processor */
2213     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2214     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2215     for (i=0; i<size; i++) {
2216       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2217         procsnz[i] += rowlengths[j];
2218       }
2219     }
2220     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2221 
2222     /* determine max buffer needed and allocate it */
2223     maxnz = 0;
2224     for (i=0; i<size; i++) {
2225       maxnz = PetscMax(maxnz,procsnz[i]);
2226     }
2227     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2228 
2229     /* read in my part of the matrix column indices  */
2230     nz     = procsnz[0];
2231     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2232     mycols = ibuf;
2233     if (size == 1)  nz -= extra_rows;
2234     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2235     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2236 
2237     /* read in every ones (except the last) and ship off */
2238     for (i=1; i<size-1; i++) {
2239       nz   = procsnz[i];
2240       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2241       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2242     }
2243     /* read in the stuff for the last proc */
2244     if (size != 1) {
2245       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2246       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2247       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2248       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2249     }
2250     ierr = PetscFree(cols);CHKERRQ(ierr);
2251   } else {  /* procs[i], i>0 */
2252     /* determine buffer space needed for message */
2253     nz = 0;
2254     for (i=0; i<m; i++) {
2255       nz += locrowlens[i];
2256     }
2257     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2258     mycols = ibuf;
2259     /* receive message of column indices*/
2260     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2261     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2262     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2263   }
2264 
2265   /* loop over local rows, determining number of off diagonal entries */
2266   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2267   odlens   = dlens + (rend-rstart);
2268   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2269   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2270   masked1  = mask    + Mbs;
2271   masked2  = masked1 + Mbs;
2272   rowcount = 0; nzcount = 0;
2273   for (i=0; i<mbs; i++) {
2274     dcount  = 0;
2275     odcount = 0;
2276     for (j=0; j<bs; j++) {
2277       kmax = locrowlens[rowcount];
2278       for (k=0; k<kmax; k++) {
2279         tmp = mycols[nzcount++]/bs; /* block col. index */
2280         if (!mask[tmp]) {
2281           mask[tmp] = 1;
2282           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2283           else masked1[dcount++] = tmp; /* entry in diag portion */
2284         }
2285       }
2286       rowcount++;
2287     }
2288 
2289     dlens[i]  = dcount;  /* d_nzz[i] */
2290     odlens[i] = odcount; /* o_nzz[i] */
2291 
2292     /* zero out the mask elements we set */
2293     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2294     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2295   }
2296 
2297   /* create our matrix */
2298   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2299   ierr = MatSetType(A,type);CHKERRQ(ierr);
2300   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2301   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2302 
2303   if (!rank) {
2304     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2305     /* read in my part of the matrix numerical values  */
2306     nz = procsnz[0];
2307     vals = buf;
2308     mycols = ibuf;
2309     if (size == 1)  nz -= extra_rows;
2310     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2311     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2312 
2313     /* insert into matrix */
2314     jj      = rstart*bs;
2315     for (i=0; i<m; i++) {
2316       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2317       mycols += locrowlens[i];
2318       vals   += locrowlens[i];
2319       jj++;
2320     }
2321 
2322     /* read in other processors (except the last one) and ship out */
2323     for (i=1; i<size-1; i++) {
2324       nz   = procsnz[i];
2325       vals = buf;
2326       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2327       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2328     }
2329     /* the last proc */
2330     if (size != 1){
2331       nz   = procsnz[i] - extra_rows;
2332       vals = buf;
2333       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2334       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2335       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2336     }
2337     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2338 
2339   } else {
2340     /* receive numeric values */
2341     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2342 
2343     /* receive message of values*/
2344     vals   = buf;
2345     mycols = ibuf;
2346     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2347     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2348     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2349 
2350     /* insert into matrix */
2351     jj      = rstart*bs;
2352     for (i=0; i<m; i++) {
2353       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2354       mycols += locrowlens[i];
2355       vals   += locrowlens[i];
2356       jj++;
2357     }
2358   }
2359 
2360   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2361   ierr = PetscFree(buf);CHKERRQ(ierr);
2362   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2363   ierr = PetscFree(rowners);CHKERRQ(ierr);
2364   ierr = PetscFree(dlens);CHKERRQ(ierr);
2365   ierr = PetscFree(mask);CHKERRQ(ierr);
2366   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2367   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2368   *newmat = A;
2369   PetscFunctionReturn(0);
2370 }
2371 
2372 #undef __FUNCT__
2373 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2374 /*@
2375    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2376 
2377    Input Parameters:
2378 .  mat  - the matrix
2379 .  fact - factor
2380 
2381    Collective on Mat
2382 
2383    Level: advanced
2384 
2385   Notes:
2386    This can also be set by the command line option: -mat_use_hash_table fact
2387 
2388 .keywords: matrix, hashtable, factor, HT
2389 
2390 .seealso: MatSetOption()
2391 @*/
2392 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2393 {
2394   PetscFunctionBegin;
2395   SETERRQ(1,"Function not yet written for SBAIJ format");
2396   /* PetscFunctionReturn(0); */
2397 }
2398 
2399 #undef __FUNCT__
2400 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2401 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2402 {
2403   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2404   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2405   PetscReal    atmp;
2406   PetscReal    *work,*svalues,*rvalues;
2407   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2408   int          rank,size,*rowners_bs,dest,count,source;
2409   PetscScalar  *va;
2410   MatScalar    *ba;
2411   MPI_Status   stat;
2412 
2413   PetscFunctionBegin;
2414   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2415   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2416 
2417   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2418   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2419 
2420   bs   = a->bs;
2421   mbs  = a->mbs;
2422   Mbs  = a->Mbs;
2423   ba   = b->a;
2424   bi   = b->i;
2425   bj   = b->j;
2426   /*
2427   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2428   PetscSynchronizedFlush(A->comm);
2429   */
2430 
2431   /* find ownerships */
2432   rowners_bs = a->rowners_bs;
2433   /*
2434   if (!rank){
2435     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2436   }
2437   */
2438 
2439   /* each proc creates an array to be distributed */
2440   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2441   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2442 
2443   /* row_max for B */
2444   if (rank != size-1){
2445     for (i=0; i<mbs; i++) {
2446       ncols = bi[1] - bi[0]; bi++;
2447       brow  = bs*i;
2448       for (j=0; j<ncols; j++){
2449         bcol = bs*(*bj);
2450         for (kcol=0; kcol<bs; kcol++){
2451           col = bcol + kcol;                 /* local col index */
2452           col += rowners_bs[rank+1];      /* global col index */
2453           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2454           for (krow=0; krow<bs; krow++){
2455             atmp = PetscAbsScalar(*ba); ba++;
2456             row = brow + krow;    /* local row index */
2457             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2458             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2459             if (work[col] < atmp) work[col] = atmp;
2460           }
2461         }
2462         bj++;
2463       }
2464     }
2465     /*
2466       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2467       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2468       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2469       */
2470 
2471     /* send values to its owners */
2472     for (dest=rank+1; dest<size; dest++){
2473       svalues = work + rowners_bs[dest];
2474       count   = rowners_bs[dest+1]-rowners_bs[dest];
2475       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2476       /*
2477       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2478       PetscSynchronizedFlush(A->comm);
2479       */
2480     }
2481   }
2482 
2483   /* receive values */
2484   if (rank){
2485     rvalues = work;
2486     count   = rowners_bs[rank+1]-rowners_bs[rank];
2487     for (source=0; source<rank; source++){
2488       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2489       /* process values */
2490       for (i=0; i<count; i++){
2491         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2492       }
2493       /*
2494       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2495       PetscSynchronizedFlush(A->comm);
2496       */
2497     }
2498   }
2499 
2500   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2501   ierr = PetscFree(work);CHKERRQ(ierr);
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatRelax_MPISBAIJ"
2507 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2508 {
2509   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2510   int            ierr,mbs=mat->mbs,bs=mat->bs;
2511   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2512   Vec            bb1;
2513 
2514   PetscFunctionBegin;
2515   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2516   if (bs > 1)
2517     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2518 
2519   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2520     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2521       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2522       its--;
2523     }
2524 
2525     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2526     while (its--){
2527 
2528       /* lower triangular part: slvec0b = - B^T*xx */
2529       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2530 
2531       /* copy xx into slvec0a */
2532       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2533       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2534       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2535       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2536 
2537       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2538 
2539       /* copy bb into slvec1a */
2540       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2541       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2542       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2543       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2544 
2545       /* set slvec1b = 0 */
2546       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2547 
2548       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2549       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2550       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2551       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2552 
2553       /* upper triangular part: bb1 = bb1 - B*x */
2554       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2555 
2556       /* local diagonal sweep */
2557       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2558     }
2559     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2560   } else {
2561     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2562   }
2563   PetscFunctionReturn(0);
2564 }
2565 
2566 #undef __FUNCT__
2567 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2568 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2569 {
2570   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2571   int            ierr;
2572   PetscScalar    mone=-1.0;
2573   Vec            lvec1,bb1;
2574 
2575   PetscFunctionBegin;
2576   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2577   if (mat->bs > 1)
2578     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2579 
2580   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2581     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2582       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2583       its--;
2584     }
2585 
2586     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2587     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2588     while (its--){
2589       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2590 
2591       /* lower diagonal part: bb1 = bb - B^T*xx */
2592       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2593       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2594 
2595       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2596       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2597       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2598 
2599       /* upper diagonal part: bb1 = bb1 - B*x */
2600       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2601       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2602 
2603       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2604 
2605       /* diagonal sweep */
2606       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2607     }
2608     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2609     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2610   } else {
2611     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2612   }
2613   PetscFunctionReturn(0);
2614 }
2615 
2616