xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 9c097c7127bb68c85bf5f7a436f8a4b636fd1b64)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 extern int MatSetUpMultiply_MPISBAIJ(Mat);
8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
11 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatPrintHelp_SeqSBAIJ(Mat);
18 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
21 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 int MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
50   int          ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 int MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
65   int          ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (int*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (int*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
237   int          ierr,i,N = m*n;
238   MatScalar    *vsingle;
239 
240   PetscFunctionBegin;
241   if (N > b->setvalueslen) {
242     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
243     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
244     b->setvalueslen  = N;
245   }
246   vsingle = b->setvaluescopy;
247 
248   for (i=0; i<N; i++) {
249     vsingle[i] = v[i];
250   }
251   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
252   PetscFunctionReturn(0);
253 }
254 
255 #undef __FUNCT__
256 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
257 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
258 {
259   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
260   int         ierr,i,N = m*n*b->bs2;
261   MatScalar   *vsingle;
262 
263   PetscFunctionBegin;
264   if (N > b->setvalueslen) {
265     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
266     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
267     b->setvalueslen  = N;
268   }
269   vsingle = b->setvaluescopy;
270   for (i=0; i<N; i++) {
271     vsingle[i] = v[i];
272   }
273   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
274   PetscFunctionReturn(0);
275 }
276 
277 #undef __FUNCT__
278 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
279 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
280 {
281   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
282   int         ierr,i,N = m*n;
283   MatScalar   *vsingle;
284 
285   PetscFunctionBegin;
286   SETERRQ(1,"Function not yet written for SBAIJ format");
287   /* PetscFunctionReturn(0); */
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
292 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
293 {
294   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
295   int         ierr,i,N = m*n*b->bs2;
296   MatScalar   *vsingle;
297 
298   PetscFunctionBegin;
299   SETERRQ(1,"Function not yet written for SBAIJ format");
300   /* PetscFunctionReturn(0); */
301 }
302 #endif
303 
304 /* Only add/insert a(i,j) with i<=j (blocks).
305    Any a(i,j) with i>j input by user is ingored.
306 */
307 #undef __FUNCT__
308 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
309 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
310 {
311   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
312   MatScalar    value;
313   PetscTruth   roworiented = baij->roworiented;
314   int          ierr,i,j,row,col;
315   int          rstart_orig=baij->rstart_bs;
316   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
317   int          cend_orig=baij->cend_bs,bs=baij->bs;
318 
319   /* Some Variables required in the macro */
320   Mat          A = baij->A;
321   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
322   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
323   MatScalar    *aa=a->a;
324 
325   Mat          B = baij->B;
326   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
327   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
328   MatScalar    *ba=b->a;
329 
330   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
331   int          low,high,t,ridx,cidx,bs2=a->bs2;
332   MatScalar    *ap,*bap;
333 
334   /* for stash */
335   int          n_loc, *in_loc=0;
336   MatScalar    *v_loc=0;
337 
338   PetscFunctionBegin;
339 
340   if(!baij->donotstash){
341     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
342     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
343   }
344 
345   for (i=0; i<m; i++) {
346     if (im[i] < 0) continue;
347 #if defined(PETSC_USE_BOPT_g)
348     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
349 #endif
350     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
351       row = im[i] - rstart_orig;              /* local row index */
352       for (j=0; j<n; j++) {
353         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
354         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
355           col = in[j] - cstart_orig;          /* local col index */
356           brow = row/bs; bcol = col/bs;
357           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
358           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
359           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
360           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
361         } else if (in[j] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
364 #endif
365         else {  /* off-diag entry (B) */
366           if (mat->was_assembled) {
367             if (!baij->colmap) {
368               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
369             }
370 #if defined (PETSC_USE_CTABLE)
371             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
372             col  = col - 1;
373 #else
374             col = baij->colmap[in[j]/bs] - 1;
375 #endif
376             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
377               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
378               col =  in[j];
379               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
380               B = baij->B;
381               b = (Mat_SeqBAIJ*)(B)->data;
382               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
383               ba=b->a;
384             } else col += in[j]%bs;
385           } else col = in[j];
386           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
387           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
388           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
389         }
390       }
391     } else {  /* off processor entry */
392       if (!baij->donotstash) {
393         n_loc = 0;
394         for (j=0; j<n; j++){
395           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
396           in_loc[n_loc] = in[j];
397           if (roworiented) {
398             v_loc[n_loc] = v[i*n+j];
399           } else {
400             v_loc[n_loc] = v[j*m+i];
401           }
402           n_loc++;
403         }
404         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
405       }
406     }
407   }
408 
409   if(!baij->donotstash){
410     ierr = PetscFree(in_loc);CHKERRQ(ierr);
411     ierr = PetscFree(v_loc);CHKERRQ(ierr);
412   }
413   PetscFunctionReturn(0);
414 }
415 
416 #undef __FUNCT__
417 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
418 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
419 {
420   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
421   const MatScalar *value;
422   MatScalar       *barray=baij->barray;
423   PetscTruth      roworiented = baij->roworiented;
424   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
425   int             rend=baij->rend,cstart=baij->cstart,stepval;
426   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
427 
428   PetscFunctionBegin;
429   if(!barray) {
430     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
431     baij->barray = barray;
432   }
433 
434   if (roworiented) {
435     stepval = (n-1)*bs;
436   } else {
437     stepval = (m-1)*bs;
438   }
439   for (i=0; i<m; i++) {
440     if (im[i] < 0) continue;
441 #if defined(PETSC_USE_BOPT_g)
442     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
443 #endif
444     if (im[i] >= rstart && im[i] < rend) {
445       row = im[i] - rstart;
446       for (j=0; j<n; j++) {
447         /* If NumCol = 1 then a copy is not required */
448         if ((roworiented) && (n == 1)) {
449           barray = (MatScalar*) v + i*bs2;
450         } else if((!roworiented) && (m == 1)) {
451           barray = (MatScalar*) v + j*bs2;
452         } else { /* Here a copy is required */
453           if (roworiented) {
454             value = v + i*(stepval+bs)*bs + j*bs;
455           } else {
456             value = v + j*(stepval+bs)*bs + i*bs;
457           }
458           for (ii=0; ii<bs; ii++,value+=stepval) {
459             for (jj=0; jj<bs; jj++) {
460               *barray++  = *value++;
461             }
462           }
463           barray -=bs2;
464         }
465 
466         if (in[j] >= cstart && in[j] < cend){
467           col  = in[j] - cstart;
468           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
469         }
470         else if (in[j] < 0) continue;
471 #if defined(PETSC_USE_BOPT_g)
472         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
473 #endif
474         else {
475           if (mat->was_assembled) {
476             if (!baij->colmap) {
477               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
478             }
479 
480 #if defined(PETSC_USE_BOPT_g)
481 #if defined (PETSC_USE_CTABLE)
482             { int data;
483               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
484               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
485             }
486 #else
487             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
488 #endif
489 #endif
490 #if defined (PETSC_USE_CTABLE)
491 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
492             col  = (col - 1)/bs;
493 #else
494             col = (baij->colmap[in[j]] - 1)/bs;
495 #endif
496             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
497               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
498               col =  in[j];
499             }
500           }
501           else col = in[j];
502           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
503         }
504       }
505     } else {
506       if (!baij->donotstash) {
507         if (roworiented) {
508           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
509         } else {
510           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
511         }
512       }
513     }
514   }
515   PetscFunctionReturn(0);
516 }
517 
518 #define HASH_KEY 0.6180339887
519 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
520 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
521 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 #undef __FUNCT__
523 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
524 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
525 {
526   PetscFunctionBegin;
527   SETERRQ(1,"Function not yet written for SBAIJ format");
528   /* PetscFunctionReturn(0); */
529 }
530 
531 #undef __FUNCT__
532 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
533 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
534 {
535   PetscFunctionBegin;
536   SETERRQ(1,"Function not yet written for SBAIJ format");
537   /* PetscFunctionReturn(0); */
538 }
539 
540 #undef __FUNCT__
541 #define __FUNCT__ "MatGetValues_MPISBAIJ"
542 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
543 {
544   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
545   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
546   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
547 
548   PetscFunctionBegin;
549   for (i=0; i<m; i++) {
550     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
551     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
552     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
553       row = idxm[i] - bsrstart;
554       for (j=0; j<n; j++) {
555         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
556         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
557         if (idxn[j] >= bscstart && idxn[j] < bscend){
558           col = idxn[j] - bscstart;
559           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
560         } else {
561           if (!baij->colmap) {
562             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
563           }
564 #if defined (PETSC_USE_CTABLE)
565           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
566           data --;
567 #else
568           data = baij->colmap[idxn[j]/bs]-1;
569 #endif
570           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
571           else {
572             col  = data + idxn[j]%bs;
573             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
574           }
575         }
576       }
577     } else {
578       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
579     }
580   }
581  PetscFunctionReturn(0);
582 }
583 
584 #undef __FUNCT__
585 #define __FUNCT__ "MatNorm_MPISBAIJ"
586 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
587 {
588   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
589   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
590   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
591   int        ierr;
592   PetscReal  sum[2],*lnorm2;
593 
594   PetscFunctionBegin;
595   if (baij->size == 1) {
596     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
597   } else {
598     if (type == NORM_FROBENIUS) {
599       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
600       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
601       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
602       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
603       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
604       /*
605       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
606       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
607       */
608       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
609       /*
610       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
611       PetscSynchronizedFlush(mat->comm); */
612 
613       *norm = sqrt(sum[0] + 2*sum[1]);
614       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
615     } else {
616       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
617     }
618   }
619   PetscFunctionReturn(0);
620 }
621 
622 /*
623   Creates the hash table, and sets the table
624   This table is created only once.
625   If new entried need to be added to the matrix
626   then the hash table has to be destroyed and
627   recreated.
628 */
629 #undef __FUNCT__
630 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
631 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
632 {
633   PetscFunctionBegin;
634   SETERRQ(1,"Function not yet written for SBAIJ format");
635   /* PetscFunctionReturn(0); */
636 }
637 
638 #undef __FUNCT__
639 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
640 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
641 {
642   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
643   int         ierr,nstash,reallocs;
644   InsertMode  addv;
645 
646   PetscFunctionBegin;
647   if (baij->donotstash) {
648     PetscFunctionReturn(0);
649   }
650 
651   /* make sure all processors are either in INSERTMODE or ADDMODE */
652   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
653   if (addv == (ADD_VALUES|INSERT_VALUES)) {
654     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
655   }
656   mat->insertmode = addv; /* in case this processor had no cache */
657 
658   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
659   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
660   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
661   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
662   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
663   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
664   PetscFunctionReturn(0);
665 }
666 
667 #undef __FUNCT__
668 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
669 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
670 {
671   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
672   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
673   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
674   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
675   int         *row,*col,other_disassembled;
676   PetscTruth  r1,r2,r3;
677   MatScalar   *val;
678   InsertMode  addv = mat->insertmode;
679 
680   PetscFunctionBegin;
681 
682   if (!baij->donotstash) {
683     while (1) {
684       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
685       /*
686       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
687       PetscSynchronizedFlush(mat->comm);
688       */
689       if (!flg) break;
690 
691       for (i=0; i<n;) {
692         /* Now identify the consecutive vals belonging to the same row */
693         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
694         if (j < n) ncols = j-i;
695         else       ncols = n-i;
696         /* Now assemble all these values with a single function call */
697         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
698         i = j;
699       }
700     }
701     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
702     /* Now process the block-stash. Since the values are stashed column-oriented,
703        set the roworiented flag to column oriented, and after MatSetValues()
704        restore the original flags */
705     r1 = baij->roworiented;
706     r2 = a->roworiented;
707     r3 = b->roworiented;
708     baij->roworiented = PETSC_FALSE;
709     a->roworiented    = PETSC_FALSE;
710     b->roworiented    = PETSC_FALSE;
711     while (1) {
712       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
713       if (!flg) break;
714 
715       for (i=0; i<n;) {
716         /* Now identify the consecutive vals belonging to the same row */
717         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
718         if (j < n) ncols = j-i;
719         else       ncols = n-i;
720         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
721         i = j;
722       }
723     }
724     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
725     baij->roworiented = r1;
726     a->roworiented    = r2;
727     b->roworiented    = r3;
728   }
729 
730   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
731   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
732 
733   /* determine if any processor has disassembled, if so we must
734      also disassemble ourselfs, in order that we may reassemble. */
735   /*
736      if nonzero structure of submatrix B cannot change then we know that
737      no processor disassembled thus we can skip this stuff
738   */
739   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
740     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
741     if (mat->was_assembled && !other_disassembled) {
742       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
743     }
744   }
745 
746   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
747     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
748   }
749   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
750   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
751 
752 #if defined(PETSC_USE_BOPT_g)
753   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
754     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
755     baij->ht_total_ct  = 0;
756     baij->ht_insert_ct = 0;
757   }
758 #endif
759   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
760     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
761     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
762     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
763   }
764 
765   if (baij->rowvalues) {
766     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
767     baij->rowvalues = 0;
768   }
769 
770   PetscFunctionReturn(0);
771 }
772 
773 #undef __FUNCT__
774 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
775 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
776 {
777   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
778   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
779   PetscTruth        isascii,isdraw;
780   PetscViewer       sviewer;
781   PetscViewerFormat format;
782 
783   PetscFunctionBegin;
784   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
786   if (isascii) {
787     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
788     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
789       MatInfo info;
790       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
791       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
792       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
793               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
794               baij->bs,(int)info.memory);CHKERRQ(ierr);
795       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
796       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
797       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
798       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
799       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
800       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
801       PetscFunctionReturn(0);
802     } else if (format == PETSC_VIEWER_ASCII_INFO) {
803       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
804       PetscFunctionReturn(0);
805     }
806   }
807 
808   if (isdraw) {
809     PetscDraw       draw;
810     PetscTruth isnull;
811     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
812     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
813   }
814 
815   if (size == 1) {
816     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
817     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
818   } else {
819     /* assemble the entire matrix onto first processor. */
820     Mat         A;
821     Mat_SeqSBAIJ *Aloc;
822     Mat_SeqBAIJ *Bloc;
823     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
824     MatScalar   *a;
825 
826     if (!rank) {
827       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
828     } else {
829       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
830     }
831     PetscLogObjectParent(mat,A);
832 
833     /* copy over the A part */
834     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
835     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
836     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
837 
838     for (i=0; i<mbs; i++) {
839       rvals[0] = bs*(baij->rstart + i);
840       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
841       for (j=ai[i]; j<ai[i+1]; j++) {
842         col = (baij->cstart+aj[j])*bs;
843         for (k=0; k<bs; k++) {
844           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
845           col++; a += bs;
846         }
847       }
848     }
849     /* copy over the B part */
850     Bloc = (Mat_SeqBAIJ*)baij->B->data;
851     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
852     for (i=0; i<mbs; i++) {
853       rvals[0] = bs*(baij->rstart + i);
854       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
855       for (j=ai[i]; j<ai[i+1]; j++) {
856         col = baij->garray[aj[j]]*bs;
857         for (k=0; k<bs; k++) {
858           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
859           col++; a += bs;
860         }
861       }
862     }
863     ierr = PetscFree(rvals);CHKERRQ(ierr);
864     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
865     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     /*
867        Everyone has to call to draw the matrix since the graphics waits are
868        synchronized across all processors that share the PetscDraw object
869     */
870     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
871     if (!rank) {
872       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
873       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
874     }
875     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
876     ierr = MatDestroy(A);CHKERRQ(ierr);
877   }
878   PetscFunctionReturn(0);
879 }
880 
881 #undef __FUNCT__
882 #define __FUNCT__ "MatView_MPISBAIJ"
883 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
884 {
885   int        ierr;
886   PetscTruth isascii,isdraw,issocket,isbinary;
887 
888   PetscFunctionBegin;
889   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
893   if (isascii || isdraw || issocket || isbinary) {
894     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
895   } else {
896     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatDestroy_MPISBAIJ"
903 int MatDestroy_MPISBAIJ(Mat mat)
904 {
905   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
906   int         ierr;
907 
908   PetscFunctionBegin;
909 #if defined(PETSC_USE_LOG)
910   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
911 #endif
912   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
913   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
914   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
915   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
917 #if defined (PETSC_USE_CTABLE)
918   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
919 #else
920   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
921 #endif
922   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
923   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
924   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
925   if (baij->slvec0) {
926     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
927     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
928   }
929   if (baij->slvec1) {
930     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
931     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
933   }
934   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
935   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
936   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
937   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
938 #if defined(PETSC_USE_MAT_SINGLE)
939   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
940 #endif
941   ierr = PetscFree(baij);CHKERRQ(ierr);
942   PetscFunctionReturn(0);
943 }
944 
945 #undef __FUNCT__
946 #define __FUNCT__ "MatMult_MPISBAIJ"
947 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
948 {
949   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
950   int         ierr,nt,mbs=a->mbs,bs=a->bs;
951   PetscScalar *x,*from,zero=0.0;
952 
953   PetscFunctionBegin;
954   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
955   if (nt != A->n) {
956     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
957   }
958   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
959   if (nt != A->m) {
960     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
961   }
962 
963   /* diagonal part */
964   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
965   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
966 
967   /* subdiagonal part */
968   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
969 
970   /* copy x into the vec slvec0 */
971   ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr);
972   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
973   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
974   ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr);
975 
976   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
977   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
978   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
979 
980   /* supperdiagonal part */
981   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
982 
983   PetscFunctionReturn(0);
984 }
985 
986 #undef __FUNCT__
987 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
988 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
989 {
990   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
991   int         ierr,nt;
992 
993   PetscFunctionBegin;
994   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
995   if (nt != A->n) {
996     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
997   }
998   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
999   if (nt != A->m) {
1000     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1001   }
1002 
1003   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1004   /* do diagonal part */
1005   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1006   /* do supperdiagonal part */
1007   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1008   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1009   /* do subdiagonal part */
1010   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1011   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1012   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1013 
1014   PetscFunctionReturn(0);
1015 }
1016 
1017 #undef __FUNCT__
1018 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1019 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1020 {
1021   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1022   int          ierr,mbs=a->mbs,bs=a->bs;
1023   PetscScalar  *x,*from,zero=0.0;
1024 
1025   PetscFunctionBegin;
1026   /*
1027   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1028   PetscSynchronizedFlush(A->comm);
1029   */
1030   /* diagonal part */
1031   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1032   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1033 
1034   /* subdiagonal part */
1035   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1036 
1037   /* copy x into the vec slvec0 */
1038   ierr = VecGetArrayFast(a->slvec0,&from);CHKERRQ(ierr);
1039   ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
1040   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1041   ierr = VecRestoreArrayFast(a->slvec0,&from);CHKERRQ(ierr);
1042 
1043   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1044   ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
1045   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1046 
1047   /* supperdiagonal part */
1048   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1049 
1050   PetscFunctionReturn(0);
1051 }
1052 
1053 #undef __FUNCT__
1054 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1055 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1056 {
1057   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1058   int        ierr;
1059 
1060   PetscFunctionBegin;
1061   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1062   /* do diagonal part */
1063   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1064   /* do supperdiagonal part */
1065   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1066   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1067 
1068   /* do subdiagonal part */
1069   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1070   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1071   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1072 
1073   PetscFunctionReturn(0);
1074 }
1075 
1076 #undef __FUNCT__
1077 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1078 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1079 {
1080   int ierr;
1081 
1082   PetscFunctionBegin;
1083   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1084   PetscFunctionReturn(0);
1085 }
1086 
1087 #undef __FUNCT__
1088 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1089 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1090 {
1091   int ierr;
1092 
1093   PetscFunctionBegin;
1094   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 /*
1099   This only works correctly for square matrices where the subblock A->A is the
1100    diagonal block
1101 */
1102 #undef __FUNCT__
1103 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1104 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1105 {
1106   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1107   int         ierr;
1108 
1109   PetscFunctionBegin;
1110   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1111   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1112   PetscFunctionReturn(0);
1113 }
1114 
1115 #undef __FUNCT__
1116 #define __FUNCT__ "MatScale_MPISBAIJ"
1117 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1118 {
1119   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1120   int         ierr;
1121 
1122   PetscFunctionBegin;
1123   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1124   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1125   PetscFunctionReturn(0);
1126 }
1127 
1128 #undef __FUNCT__
1129 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1130 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1131 {
1132   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1133   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1134   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1135   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1136   int            *cmap,*idx_p,cstart = mat->cstart;
1137 
1138   PetscFunctionBegin;
1139   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1140   mat->getrowactive = PETSC_TRUE;
1141 
1142   if (!mat->rowvalues && (idx || v)) {
1143     /*
1144         allocate enough space to hold information from the longest row.
1145     */
1146     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1147     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1148     int     max = 1,mbs = mat->mbs,tmp;
1149     for (i=0; i<mbs; i++) {
1150       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1151       if (max < tmp) { max = tmp; }
1152     }
1153     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1154     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1155   }
1156 
1157   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1158   lrow = row - brstart;  /* local row index */
1159 
1160   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1161   if (!v)   {pvA = 0; pvB = 0;}
1162   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1163   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1164   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1165   nztot = nzA + nzB;
1166 
1167   cmap  = mat->garray;
1168   if (v  || idx) {
1169     if (nztot) {
1170       /* Sort by increasing column numbers, assuming A and B already sorted */
1171       int imark = -1;
1172       if (v) {
1173         *v = v_p = mat->rowvalues;
1174         for (i=0; i<nzB; i++) {
1175           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1176           else break;
1177         }
1178         imark = i;
1179         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1180         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1181       }
1182       if (idx) {
1183         *idx = idx_p = mat->rowindices;
1184         if (imark > -1) {
1185           for (i=0; i<imark; i++) {
1186             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1187           }
1188         } else {
1189           for (i=0; i<nzB; i++) {
1190             if (cmap[cworkB[i]/bs] < cstart)
1191               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1192             else break;
1193           }
1194           imark = i;
1195         }
1196         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1197         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1198       }
1199     } else {
1200       if (idx) *idx = 0;
1201       if (v)   *v   = 0;
1202     }
1203   }
1204   *nz = nztot;
1205   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1206   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1207   PetscFunctionReturn(0);
1208 }
1209 
1210 #undef __FUNCT__
1211 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1212 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1213 {
1214   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1215 
1216   PetscFunctionBegin;
1217   if (baij->getrowactive == PETSC_FALSE) {
1218     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1219   }
1220   baij->getrowactive = PETSC_FALSE;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1226 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1227 {
1228   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1229 
1230   PetscFunctionBegin;
1231   *bs = baij->bs;
1232   PetscFunctionReturn(0);
1233 }
1234 
1235 #undef __FUNCT__
1236 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1237 int MatZeroEntries_MPISBAIJ(Mat A)
1238 {
1239   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1240   int         ierr;
1241 
1242   PetscFunctionBegin;
1243   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1244   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1245   PetscFunctionReturn(0);
1246 }
1247 
1248 #undef __FUNCT__
1249 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1250 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1251 {
1252   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1253   Mat         A = a->A,B = a->B;
1254   int         ierr;
1255   PetscReal   isend[5],irecv[5];
1256 
1257   PetscFunctionBegin;
1258   info->block_size     = (PetscReal)a->bs;
1259   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1260   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1261   isend[3] = info->memory;  isend[4] = info->mallocs;
1262   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1263   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1264   isend[3] += info->memory;  isend[4] += info->mallocs;
1265   if (flag == MAT_LOCAL) {
1266     info->nz_used      = isend[0];
1267     info->nz_allocated = isend[1];
1268     info->nz_unneeded  = isend[2];
1269     info->memory       = isend[3];
1270     info->mallocs      = isend[4];
1271   } else if (flag == MAT_GLOBAL_MAX) {
1272     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1273     info->nz_used      = irecv[0];
1274     info->nz_allocated = irecv[1];
1275     info->nz_unneeded  = irecv[2];
1276     info->memory       = irecv[3];
1277     info->mallocs      = irecv[4];
1278   } else if (flag == MAT_GLOBAL_SUM) {
1279     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1280     info->nz_used      = irecv[0];
1281     info->nz_allocated = irecv[1];
1282     info->nz_unneeded  = irecv[2];
1283     info->memory       = irecv[3];
1284     info->mallocs      = irecv[4];
1285   } else {
1286     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1287   }
1288   info->rows_global       = (PetscReal)A->M;
1289   info->columns_global    = (PetscReal)A->N;
1290   info->rows_local        = (PetscReal)A->m;
1291   info->columns_local     = (PetscReal)A->N;
1292   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1293   info->fill_ratio_needed = 0;
1294   info->factor_mallocs    = 0;
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1300 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1301 {
1302   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1303   int         ierr;
1304 
1305   PetscFunctionBegin;
1306   switch (op) {
1307   case MAT_NO_NEW_NONZERO_LOCATIONS:
1308   case MAT_YES_NEW_NONZERO_LOCATIONS:
1309   case MAT_COLUMNS_UNSORTED:
1310   case MAT_COLUMNS_SORTED:
1311   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1312   case MAT_KEEP_ZEROED_ROWS:
1313   case MAT_NEW_NONZERO_LOCATION_ERR:
1314     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1315     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1316     break;
1317   case MAT_ROW_ORIENTED:
1318     a->roworiented = PETSC_TRUE;
1319     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1320     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1321     break;
1322   case MAT_ROWS_SORTED:
1323   case MAT_ROWS_UNSORTED:
1324   case MAT_YES_NEW_DIAGONALS:
1325     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1326     break;
1327   case MAT_COLUMN_ORIENTED:
1328     a->roworiented = PETSC_FALSE;
1329     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1330     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1331     break;
1332   case MAT_IGNORE_OFF_PROC_ENTRIES:
1333     a->donotstash = PETSC_TRUE;
1334     break;
1335   case MAT_NO_NEW_DIAGONALS:
1336     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1337   case MAT_USE_HASH_TABLE:
1338     a->ht_flag = PETSC_TRUE;
1339     break;
1340   case MAT_NOT_SYMMETRIC:
1341   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1342   case MAT_HERMITIAN:
1343     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1344   case MAT_SYMMETRIC:
1345   case MAT_STRUCTURALLY_SYMMETRIC:
1346   case MAT_NOT_HERMITIAN:
1347   case MAT_SYMMETRY_ETERNAL:
1348   case MAT_NOT_SYMMETRY_ETERNAL:
1349     break;
1350   default:
1351     SETERRQ(PETSC_ERR_SUP,"unknown option");
1352   }
1353   PetscFunctionReturn(0);
1354 }
1355 
1356 #undef __FUNCT__
1357 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1358 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1359 {
1360   int ierr;
1361   PetscFunctionBegin;
1362   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1368 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1369 {
1370   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1371   Mat         a = baij->A,b = baij->B;
1372   int         ierr,s1,s2,s3;
1373 
1374   PetscFunctionBegin;
1375   if (ll != rr) {
1376     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1377   }
1378   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1379   if (rr) {
1380     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1381     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1382     /* Overlap communication with computation. */
1383     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1384     /*} if (ll) { */
1385     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1386     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1387     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1388     /* } */
1389   /* scale  the diagonal block */
1390   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1391 
1392   /* if (rr) { */
1393     /* Do a scatter end and then right scale the off-diagonal block */
1394     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1395     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1396   }
1397 
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1403 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1404 {
1405   PetscFunctionBegin;
1406   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1407 }
1408 
1409 #undef __FUNCT__
1410 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1411 int MatPrintHelp_MPISBAIJ(Mat A)
1412 {
1413   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1414   MPI_Comm    comm = A->comm;
1415   static int  called = 0;
1416   int         ierr;
1417 
1418   PetscFunctionBegin;
1419   if (!a->rank) {
1420     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1421   }
1422   if (called) {PetscFunctionReturn(0);} else called = 1;
1423   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1424   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1425   PetscFunctionReturn(0);
1426 }
1427 
1428 #undef __FUNCT__
1429 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1430 int MatSetUnfactored_MPISBAIJ(Mat A)
1431 {
1432   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1433   int         ierr;
1434 
1435   PetscFunctionBegin;
1436   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1441 
1442 #undef __FUNCT__
1443 #define __FUNCT__ "MatEqual_MPISBAIJ"
1444 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1445 {
1446   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1447   Mat         a,b,c,d;
1448   PetscTruth  flg;
1449   int         ierr;
1450 
1451   PetscFunctionBegin;
1452   a = matA->A; b = matA->B;
1453   c = matB->A; d = matB->B;
1454 
1455   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1456   if (flg == PETSC_TRUE) {
1457     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1458   }
1459   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1465 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1466 {
1467   int        ierr;
1468 
1469   PetscFunctionBegin;
1470   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1471   PetscFunctionReturn(0);
1472 }
1473 
1474 #undef __FUNCT__
1475 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1476 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1477 {
1478   int        i,ierr;
1479   PetscTruth flg;
1480 
1481   for (i=0; i<n; i++) {
1482     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1483     if (!flg) {
1484       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1485     }
1486   }
1487   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1488   PetscFunctionReturn(0);
1489 }
1490 
1491 
1492 /* -------------------------------------------------------------------*/
1493 static struct _MatOps MatOps_Values = {
1494        MatSetValues_MPISBAIJ,
1495        MatGetRow_MPISBAIJ,
1496        MatRestoreRow_MPISBAIJ,
1497        MatMult_MPISBAIJ,
1498 /* 4*/ MatMultAdd_MPISBAIJ,
1499        MatMultTranspose_MPISBAIJ,
1500        MatMultTransposeAdd_MPISBAIJ,
1501        0,
1502        0,
1503        0,
1504 /*10*/ 0,
1505        0,
1506        0,
1507        MatRelax_MPISBAIJ,
1508        MatTranspose_MPISBAIJ,
1509 /*15*/ MatGetInfo_MPISBAIJ,
1510        MatEqual_MPISBAIJ,
1511        MatGetDiagonal_MPISBAIJ,
1512        MatDiagonalScale_MPISBAIJ,
1513        MatNorm_MPISBAIJ,
1514 /*20*/ MatAssemblyBegin_MPISBAIJ,
1515        MatAssemblyEnd_MPISBAIJ,
1516        0,
1517        MatSetOption_MPISBAIJ,
1518        MatZeroEntries_MPISBAIJ,
1519 /*25*/ MatZeroRows_MPISBAIJ,
1520        0,
1521        0,
1522        0,
1523        0,
1524 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1525        0,
1526        0,
1527        0,
1528        0,
1529 /*35*/ MatDuplicate_MPISBAIJ,
1530        0,
1531        0,
1532        0,
1533        0,
1534 /*40*/ 0,
1535        MatGetSubMatrices_MPISBAIJ,
1536        0,
1537        MatGetValues_MPISBAIJ,
1538        0,
1539 /*45*/ MatPrintHelp_MPISBAIJ,
1540        MatScale_MPISBAIJ,
1541        0,
1542        0,
1543        0,
1544 /*50*/ MatGetBlockSize_MPISBAIJ,
1545        0,
1546        0,
1547        0,
1548        0,
1549 /*55*/ 0,
1550        0,
1551        MatSetUnfactored_MPISBAIJ,
1552        0,
1553        MatSetValuesBlocked_MPISBAIJ,
1554 /*60*/ 0,
1555        0,
1556        0,
1557        MatGetPetscMaps_Petsc,
1558        0,
1559 /*65*/ 0,
1560        0,
1561        0,
1562        0,
1563        0,
1564 /*70*/ MatGetRowMax_MPISBAIJ,
1565        0,
1566        0,
1567        0,
1568        0,
1569 /*75*/ 0,
1570        0,
1571        0,
1572        0,
1573        0,
1574 /*80*/ 0,
1575        0,
1576        0,
1577        0,
1578 /*85*/ MatLoad_MPISBAIJ
1579 };
1580 
1581 
1582 EXTERN_C_BEGIN
1583 #undef __FUNCT__
1584 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1585 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1586 {
1587   PetscFunctionBegin;
1588   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1589   *iscopy = PETSC_FALSE;
1590   PetscFunctionReturn(0);
1591 }
1592 EXTERN_C_END
1593 
1594 EXTERN_C_BEGIN
1595 #undef __FUNCT__
1596 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1597 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1598 {
1599   Mat_MPISBAIJ *b;
1600   int          ierr,i,mbs,Mbs;
1601 
1602   PetscFunctionBegin;
1603   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1604 
1605   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1606   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1607   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1608   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1609   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1610   if (d_nnz) {
1611     for (i=0; i<B->m/bs; i++) {
1612       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1613     }
1614   }
1615   if (o_nnz) {
1616     for (i=0; i<B->m/bs; i++) {
1617       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1618     }
1619   }
1620   B->preallocated = PETSC_TRUE;
1621   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1622   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1623   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1624   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1625 
1626   b   = (Mat_MPISBAIJ*)B->data;
1627   mbs = B->m/bs;
1628   Mbs = B->M/bs;
1629   if (mbs*bs != B->m) {
1630     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1631   }
1632 
1633   b->bs  = bs;
1634   b->bs2 = bs*bs;
1635   b->mbs = mbs;
1636   b->nbs = mbs;
1637   b->Mbs = Mbs;
1638   b->Nbs = Mbs;
1639 
1640   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1641   b->rowners[0]    = 0;
1642   for (i=2; i<=b->size; i++) {
1643     b->rowners[i] += b->rowners[i-1];
1644   }
1645   b->rstart    = b->rowners[b->rank];
1646   b->rend      = b->rowners[b->rank+1];
1647   b->cstart    = b->rstart;
1648   b->cend      = b->rend;
1649   for (i=0; i<=b->size; i++) {
1650     b->rowners_bs[i] = b->rowners[i]*bs;
1651   }
1652   b->rstart_bs = b-> rstart*bs;
1653   b->rend_bs   = b->rend*bs;
1654 
1655   b->cstart_bs = b->cstart*bs;
1656   b->cend_bs   = b->cend*bs;
1657 
1658   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1659   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1660   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1661   PetscLogObjectParent(B,b->A);
1662 
1663   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1664   ierr = MatSetType(b->B,MATSEQSBAIJ);CHKERRQ(ierr);
1665   ierr = MatSeqSBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1666   PetscLogObjectParent(B,b->B);
1667 
1668   /* build cache for off array entries formed */
1669   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1670 
1671   PetscFunctionReturn(0);
1672 }
1673 EXTERN_C_END
1674 
1675 /*MC
1676    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1677    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1678 
1679    Options Database Keys:
1680 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1681 
1682   Level: beginner
1683 
1684 .seealso: MatCreateMPISBAIJ
1685 M*/
1686 
1687 EXTERN_C_BEGIN
1688 #undef __FUNCT__
1689 #define __FUNCT__ "MatCreate_MPISBAIJ"
1690 int MatCreate_MPISBAIJ(Mat B)
1691 {
1692   Mat_MPISBAIJ *b;
1693   int          ierr;
1694   PetscTruth   flg;
1695 
1696   PetscFunctionBegin;
1697 
1698   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1699   B->data = (void*)b;
1700   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1701   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1702 
1703   B->ops->destroy    = MatDestroy_MPISBAIJ;
1704   B->ops->view       = MatView_MPISBAIJ;
1705   B->mapping    = 0;
1706   B->factor     = 0;
1707   B->assembled  = PETSC_FALSE;
1708 
1709   B->insertmode = NOT_SET_VALUES;
1710   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1711   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1712 
1713   /* build local table of row and column ownerships */
1714   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1715   b->cowners    = b->rowners + b->size + 2;
1716   b->rowners_bs = b->cowners + b->size + 2;
1717   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1718 
1719   /* build cache for off array entries formed */
1720   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1721   b->donotstash  = PETSC_FALSE;
1722   b->colmap      = PETSC_NULL;
1723   b->garray      = PETSC_NULL;
1724   b->roworiented = PETSC_TRUE;
1725 
1726 #if defined(PETSC_USE_MAT_SINGLE)
1727   /* stuff for MatSetValues_XXX in single precision */
1728   b->setvalueslen     = 0;
1729   b->setvaluescopy    = PETSC_NULL;
1730 #endif
1731 
1732   /* stuff used in block assembly */
1733   b->barray       = 0;
1734 
1735   /* stuff used for matrix vector multiply */
1736   b->lvec         = 0;
1737   b->Mvctx        = 0;
1738   b->slvec0       = 0;
1739   b->slvec0b      = 0;
1740   b->slvec1       = 0;
1741   b->slvec1a      = 0;
1742   b->slvec1b      = 0;
1743   b->sMvctx       = 0;
1744 
1745   /* stuff for MatGetRow() */
1746   b->rowindices   = 0;
1747   b->rowvalues    = 0;
1748   b->getrowactive = PETSC_FALSE;
1749 
1750   /* hash table stuff */
1751   b->ht           = 0;
1752   b->hd           = 0;
1753   b->ht_size      = 0;
1754   b->ht_flag      = PETSC_FALSE;
1755   b->ht_fact      = 0;
1756   b->ht_total_ct  = 0;
1757   b->ht_insert_ct = 0;
1758 
1759   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1760   if (flg) {
1761     PetscReal fact = 1.39;
1762     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1763     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1764     if (fact <= 1.0) fact = 1.39;
1765     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1766     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1767   }
1768   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1769                                      "MatStoreValues_MPISBAIJ",
1770                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1771   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1772                                      "MatRetrieveValues_MPISBAIJ",
1773                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1774   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1775                                      "MatGetDiagonalBlock_MPISBAIJ",
1776                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1777   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1778                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1779                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1780   PetscFunctionReturn(0);
1781 }
1782 EXTERN_C_END
1783 
1784 /*MC
1785    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1786 
1787    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1788    and MATMPISBAIJ otherwise.
1789 
1790    Options Database Keys:
1791 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1792 
1793   Level: beginner
1794 
1795 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1796 M*/
1797 
1798 EXTERN_C_BEGIN
1799 #undef __FUNCT__
1800 #define __FUNCT__ "MatCreate_SBAIJ"
1801 int MatCreate_SBAIJ(Mat A) {
1802   int ierr,size;
1803 
1804   PetscFunctionBegin;
1805   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1806   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1807   if (size == 1) {
1808     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1809   } else {
1810     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1811   }
1812   PetscFunctionReturn(0);
1813 }
1814 EXTERN_C_END
1815 
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1818 /*@C
1819    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1820    the user should preallocate the matrix storage by setting the parameters
1821    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1822    performance can be increased by more than a factor of 50.
1823 
1824    Collective on Mat
1825 
1826    Input Parameters:
1827 +  A - the matrix
1828 .  bs   - size of blockk
1829 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1830            submatrix  (same for all local rows)
1831 .  d_nnz - array containing the number of block nonzeros in the various block rows
1832            in the upper triangular and diagonal part of the in diagonal portion of the local
1833            (possibly different for each block row) or PETSC_NULL.  You must leave room
1834            for the diagonal entry even if it is zero.
1835 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1836            submatrix (same for all local rows).
1837 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1838            off-diagonal portion of the local submatrix (possibly different for
1839            each block row) or PETSC_NULL.
1840 
1841 
1842    Options Database Keys:
1843 .   -mat_no_unroll - uses code that does not unroll the loops in the
1844                      block calculations (much slower)
1845 .   -mat_block_size - size of the blocks to use
1846 
1847    Notes:
1848 
1849    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1850    than it must be used on all processors that share the object for that argument.
1851 
1852    Storage Information:
1853    For a square global matrix we define each processor's diagonal portion
1854    to be its local rows and the corresponding columns (a square submatrix);
1855    each processor's off-diagonal portion encompasses the remainder of the
1856    local matrix (a rectangular submatrix).
1857 
1858    The user can specify preallocated storage for the diagonal part of
1859    the local submatrix with either d_nz or d_nnz (not both).  Set
1860    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1861    memory allocation.  Likewise, specify preallocated storage for the
1862    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1863 
1864    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1865    the figure below we depict these three local rows and all columns (0-11).
1866 
1867 .vb
1868            0 1 2 3 4 5 6 7 8 9 10 11
1869           -------------------
1870    row 3  |  o o o d d d o o o o o o
1871    row 4  |  o o o d d d o o o o o o
1872    row 5  |  o o o d d d o o o o o o
1873           -------------------
1874 .ve
1875 
1876    Thus, any entries in the d locations are stored in the d (diagonal)
1877    submatrix, and any entries in the o locations are stored in the
1878    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1879    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1880 
1881    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1882    plus the diagonal part of the d matrix,
1883    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1884    In general, for PDE problems in which most nonzeros are near the diagonal,
1885    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1886    or you will get TERRIBLE performance; see the users' manual chapter on
1887    matrices.
1888 
1889    Level: intermediate
1890 
1891 .keywords: matrix, block, aij, compressed row, sparse, parallel
1892 
1893 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1894 @*/
1895 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1896 {
1897   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1898 
1899   PetscFunctionBegin;
1900   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1901   if (f) {
1902     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1903   }
1904   PetscFunctionReturn(0);
1905 }
1906 
1907 #undef __FUNCT__
1908 #define __FUNCT__ "MatCreateMPISBAIJ"
1909 /*@C
1910    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1911    (block compressed row).  For good matrix assembly performance
1912    the user should preallocate the matrix storage by setting the parameters
1913    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1914    performance can be increased by more than a factor of 50.
1915 
1916    Collective on MPI_Comm
1917 
1918    Input Parameters:
1919 +  comm - MPI communicator
1920 .  bs   - size of blockk
1921 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1922            This value should be the same as the local size used in creating the
1923            y vector for the matrix-vector product y = Ax.
1924 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1925            This value should be the same as the local size used in creating the
1926            x vector for the matrix-vector product y = Ax.
1927 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1928 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1929 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1930            submatrix  (same for all local rows)
1931 .  d_nnz - array containing the number of block nonzeros in the various block rows
1932            in the upper triangular portion of the in diagonal portion of the local
1933            (possibly different for each block block row) or PETSC_NULL.
1934            You must leave room for the diagonal entry even if it is zero.
1935 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1936            submatrix (same for all local rows).
1937 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1938            off-diagonal portion of the local submatrix (possibly different for
1939            each block row) or PETSC_NULL.
1940 
1941    Output Parameter:
1942 .  A - the matrix
1943 
1944    Options Database Keys:
1945 .   -mat_no_unroll - uses code that does not unroll the loops in the
1946                      block calculations (much slower)
1947 .   -mat_block_size - size of the blocks to use
1948 .   -mat_mpi - use the parallel matrix data structures even on one processor
1949                (defaults to using SeqBAIJ format on one processor)
1950 
1951    Notes:
1952    The user MUST specify either the local or global matrix dimensions
1953    (possibly both).
1954 
1955    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1956    than it must be used on all processors that share the object for that argument.
1957 
1958    Storage Information:
1959    For a square global matrix we define each processor's diagonal portion
1960    to be its local rows and the corresponding columns (a square submatrix);
1961    each processor's off-diagonal portion encompasses the remainder of the
1962    local matrix (a rectangular submatrix).
1963 
1964    The user can specify preallocated storage for the diagonal part of
1965    the local submatrix with either d_nz or d_nnz (not both).  Set
1966    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1967    memory allocation.  Likewise, specify preallocated storage for the
1968    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1969 
1970    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1971    the figure below we depict these three local rows and all columns (0-11).
1972 
1973 .vb
1974            0 1 2 3 4 5 6 7 8 9 10 11
1975           -------------------
1976    row 3  |  o o o d d d o o o o o o
1977    row 4  |  o o o d d d o o o o o o
1978    row 5  |  o o o d d d o o o o o o
1979           -------------------
1980 .ve
1981 
1982    Thus, any entries in the d locations are stored in the d (diagonal)
1983    submatrix, and any entries in the o locations are stored in the
1984    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1985    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1986 
1987    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1988    plus the diagonal part of the d matrix,
1989    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1990    In general, for PDE problems in which most nonzeros are near the diagonal,
1991    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1992    or you will get TERRIBLE performance; see the users' manual chapter on
1993    matrices.
1994 
1995    Level: intermediate
1996 
1997 .keywords: matrix, block, aij, compressed row, sparse, parallel
1998 
1999 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2000 @*/
2001 
2002 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2003 {
2004   int ierr,size;
2005 
2006   PetscFunctionBegin;
2007   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2008   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2009   if (size > 1) {
2010     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2011     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2012   } else {
2013     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2014     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2015   }
2016   PetscFunctionReturn(0);
2017 }
2018 
2019 
2020 #undef __FUNCT__
2021 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2022 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2023 {
2024   Mat          mat;
2025   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2026   int          ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2027   PetscScalar  *array;
2028 
2029   PetscFunctionBegin;
2030   *newmat       = 0;
2031   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2032   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2033   /* ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);  -- cause error? */
2034   mat->factor       = matin->factor;
2035   mat->preallocated = PETSC_TRUE;
2036   mat->assembled    = PETSC_TRUE;
2037   a = (Mat_MPISBAIJ*)mat->data;
2038   a->bs  = oldmat->bs;
2039   a->bs2 = oldmat->bs2;
2040   a->mbs = oldmat->mbs;
2041   a->nbs = oldmat->nbs;
2042   a->Mbs = oldmat->Mbs;
2043   a->Nbs = oldmat->Nbs;
2044 
2045   a->rstart       = oldmat->rstart;
2046   a->rend         = oldmat->rend;
2047   a->cstart       = oldmat->cstart;
2048   a->cend         = oldmat->cend;
2049   a->size         = oldmat->size;
2050   a->rank         = oldmat->rank;
2051   a->donotstash   = oldmat->donotstash;
2052   a->roworiented  = oldmat->roworiented;
2053   a->rowindices   = 0;
2054   a->rowvalues    = 0;
2055   a->getrowactive = PETSC_FALSE;
2056   a->barray       = 0;
2057   a->rstart_bs    = oldmat->rstart_bs;
2058   a->rend_bs      = oldmat->rend_bs;
2059   a->cstart_bs    = oldmat->cstart_bs;
2060   a->cend_bs      = oldmat->cend_bs;
2061 
2062   /* hash table stuff */
2063   a->ht           = 0;
2064   a->hd           = 0;
2065   a->ht_size      = 0;
2066   a->ht_flag      = oldmat->ht_flag;
2067   a->ht_fact      = oldmat->ht_fact;
2068   a->ht_total_ct  = 0;
2069   a->ht_insert_ct = 0;
2070 
2071   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2072   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2073   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2074   if (oldmat->colmap) {
2075 #if defined (PETSC_USE_CTABLE)
2076     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2077 #else
2078     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2079     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2080     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2081 #endif
2082   } else a->colmap = 0;
2083 
2084   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2085     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2086     PetscLogObjectMemory(mat,len*sizeof(int));
2087     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2088   } else a->garray = 0;
2089 
2090   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2091   PetscLogObjectParent(mat,a->lvec);
2092   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2093   PetscLogObjectParent(mat,a->Mvctx);
2094 
2095   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2096   PetscLogObjectParent(mat,a->slvec0);
2097   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2098   PetscLogObjectParent(mat,a->slvec1);
2099 
2100   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2101   ierr = VecGetArrayFast(a->slvec1,&array);CHKERRQ(ierr);
2102   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2103   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2104   ierr = VecRestoreArrayFast(a->slvec1,&array);CHKERRQ(ierr);
2105   ierr = VecGetArrayFast(a->slvec0,&array);CHKERRQ(ierr);
2106   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2107   ierr = VecRestoreArrayFast(a->slvec0,&array);CHKERRQ(ierr);
2108   PetscLogObjectParent(mat,a->slvec0);
2109   PetscLogObjectParent(mat,a->slvec1);
2110   PetscLogObjectParent(mat,a->slvec0b);
2111   PetscLogObjectParent(mat,a->slvec1a);
2112   PetscLogObjectParent(mat,a->slvec1b);
2113 
2114   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2115   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2116   a->sMvctx = oldmat->sMvctx;
2117   PetscLogObjectParent(mat,a->sMvctx);
2118 
2119   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2120   PetscLogObjectParent(mat,a->A);
2121   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2122   PetscLogObjectParent(mat,a->B);
2123   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2124   *newmat = mat;
2125   PetscFunctionReturn(0);
2126 }
2127 
2128 #include "petscsys.h"
2129 
2130 #undef __FUNCT__
2131 #define __FUNCT__ "MatLoad_MPISBAIJ"
2132 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2133 {
2134   Mat          A;
2135   int          i,nz,ierr,j,rstart,rend,fd;
2136   PetscScalar  *vals,*buf;
2137   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2138   MPI_Status   status;
2139   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2140   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2141   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2142   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2143   int          dcount,kmax,k,nzcount,tmp;
2144 
2145   PetscFunctionBegin;
2146   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2147 
2148   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2149   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2150   if (!rank) {
2151     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2152     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2153     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2154     if (header[3] < 0) {
2155       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2156     }
2157   }
2158 
2159   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2160   M = header[1]; N = header[2];
2161 
2162   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2163 
2164   /*
2165      This code adds extra rows to make sure the number of rows is
2166      divisible by the blocksize
2167   */
2168   Mbs        = M/bs;
2169   extra_rows = bs - M + bs*(Mbs);
2170   if (extra_rows == bs) extra_rows = 0;
2171   else                  Mbs++;
2172   if (extra_rows &&!rank) {
2173     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2174   }
2175 
2176   /* determine ownership of all rows */
2177   mbs        = Mbs/size + ((Mbs % size) > rank);
2178   m          = mbs*bs;
2179   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2180   browners   = rowners + size + 1;
2181   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2182   rowners[0] = 0;
2183   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2184   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2185   rstart = rowners[rank];
2186   rend   = rowners[rank+1];
2187 
2188   /* distribute row lengths to all processors */
2189   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2190   if (!rank) {
2191     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2192     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2193     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2194     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2195     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2196     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2197     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2198   } else {
2199     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2200   }
2201 
2202   if (!rank) {   /* procs[0] */
2203     /* calculate the number of nonzeros on each processor */
2204     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2205     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2206     for (i=0; i<size; i++) {
2207       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2208         procsnz[i] += rowlengths[j];
2209       }
2210     }
2211     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2212 
2213     /* determine max buffer needed and allocate it */
2214     maxnz = 0;
2215     for (i=0; i<size; i++) {
2216       maxnz = PetscMax(maxnz,procsnz[i]);
2217     }
2218     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2219 
2220     /* read in my part of the matrix column indices  */
2221     nz     = procsnz[0];
2222     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2223     mycols = ibuf;
2224     if (size == 1)  nz -= extra_rows;
2225     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2226     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2227 
2228     /* read in every ones (except the last) and ship off */
2229     for (i=1; i<size-1; i++) {
2230       nz   = procsnz[i];
2231       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2232       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2233     }
2234     /* read in the stuff for the last proc */
2235     if (size != 1) {
2236       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2237       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2238       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2239       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2240     }
2241     ierr = PetscFree(cols);CHKERRQ(ierr);
2242   } else {  /* procs[i], i>0 */
2243     /* determine buffer space needed for message */
2244     nz = 0;
2245     for (i=0; i<m; i++) {
2246       nz += locrowlens[i];
2247     }
2248     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2249     mycols = ibuf;
2250     /* receive message of column indices*/
2251     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2252     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2253     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2254   }
2255 
2256   /* loop over local rows, determining number of off diagonal entries */
2257   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2258   odlens   = dlens + (rend-rstart);
2259   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2260   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2261   masked1  = mask    + Mbs;
2262   masked2  = masked1 + Mbs;
2263   rowcount = 0; nzcount = 0;
2264   for (i=0; i<mbs; i++) {
2265     dcount  = 0;
2266     odcount = 0;
2267     for (j=0; j<bs; j++) {
2268       kmax = locrowlens[rowcount];
2269       for (k=0; k<kmax; k++) {
2270         tmp = mycols[nzcount++]/bs; /* block col. index */
2271         if (!mask[tmp]) {
2272           mask[tmp] = 1;
2273           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2274           else masked1[dcount++] = tmp; /* entry in diag portion */
2275         }
2276       }
2277       rowcount++;
2278     }
2279 
2280     dlens[i]  = dcount;  /* d_nzz[i] */
2281     odlens[i] = odcount; /* o_nzz[i] */
2282 
2283     /* zero out the mask elements we set */
2284     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2285     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2286   }
2287 
2288   /* create our matrix */
2289   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2290   ierr = MatSetType(A,type);CHKERRQ(ierr);
2291   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2292   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2293 
2294   if (!rank) {
2295     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2296     /* read in my part of the matrix numerical values  */
2297     nz = procsnz[0];
2298     vals = buf;
2299     mycols = ibuf;
2300     if (size == 1)  nz -= extra_rows;
2301     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2302     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2303 
2304     /* insert into matrix */
2305     jj      = rstart*bs;
2306     for (i=0; i<m; i++) {
2307       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2308       mycols += locrowlens[i];
2309       vals   += locrowlens[i];
2310       jj++;
2311     }
2312 
2313     /* read in other processors (except the last one) and ship out */
2314     for (i=1; i<size-1; i++) {
2315       nz   = procsnz[i];
2316       vals = buf;
2317       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2318       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2319     }
2320     /* the last proc */
2321     if (size != 1){
2322       nz   = procsnz[i] - extra_rows;
2323       vals = buf;
2324       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2325       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2326       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2327     }
2328     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2329 
2330   } else {
2331     /* receive numeric values */
2332     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2333 
2334     /* receive message of values*/
2335     vals   = buf;
2336     mycols = ibuf;
2337     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2338     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2339     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2340 
2341     /* insert into matrix */
2342     jj      = rstart*bs;
2343     for (i=0; i<m; i++) {
2344       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2345       mycols += locrowlens[i];
2346       vals   += locrowlens[i];
2347       jj++;
2348     }
2349   }
2350 
2351   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2352   ierr = PetscFree(buf);CHKERRQ(ierr);
2353   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2354   ierr = PetscFree(rowners);CHKERRQ(ierr);
2355   ierr = PetscFree(dlens);CHKERRQ(ierr);
2356   ierr = PetscFree(mask);CHKERRQ(ierr);
2357   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2358   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2359   *newmat = A;
2360   PetscFunctionReturn(0);
2361 }
2362 
2363 #undef __FUNCT__
2364 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2365 /*@
2366    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2367 
2368    Input Parameters:
2369 .  mat  - the matrix
2370 .  fact - factor
2371 
2372    Collective on Mat
2373 
2374    Level: advanced
2375 
2376   Notes:
2377    This can also be set by the command line option: -mat_use_hash_table fact
2378 
2379 .keywords: matrix, hashtable, factor, HT
2380 
2381 .seealso: MatSetOption()
2382 @*/
2383 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2384 {
2385   PetscFunctionBegin;
2386   SETERRQ(1,"Function not yet written for SBAIJ format");
2387   /* PetscFunctionReturn(0); */
2388 }
2389 
2390 #undef __FUNCT__
2391 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2392 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2393 {
2394   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2395   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2396   PetscReal    atmp;
2397   PetscReal    *work,*svalues,*rvalues;
2398   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2399   int          rank,size,*rowners_bs,dest,count,source;
2400   PetscScalar  *va;
2401   MatScalar    *ba;
2402   MPI_Status   stat;
2403 
2404   PetscFunctionBegin;
2405   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2406   ierr = VecGetArrayFast(v,&va);CHKERRQ(ierr);
2407 
2408   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2409   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2410 
2411   bs   = a->bs;
2412   mbs  = a->mbs;
2413   Mbs  = a->Mbs;
2414   ba   = b->a;
2415   bi   = b->i;
2416   bj   = b->j;
2417   /*
2418   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2419   PetscSynchronizedFlush(A->comm);
2420   */
2421 
2422   /* find ownerships */
2423   rowners_bs = a->rowners_bs;
2424   /*
2425   if (!rank){
2426     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2427   }
2428   */
2429 
2430   /* each proc creates an array to be distributed */
2431   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2432   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2433 
2434   /* row_max for B */
2435   if (rank != size-1){
2436     for (i=0; i<mbs; i++) {
2437       ncols = bi[1] - bi[0]; bi++;
2438       brow  = bs*i;
2439       for (j=0; j<ncols; j++){
2440         bcol = bs*(*bj);
2441         for (kcol=0; kcol<bs; kcol++){
2442           col = bcol + kcol;                 /* local col index */
2443           col += rowners_bs[rank+1];      /* global col index */
2444           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2445           for (krow=0; krow<bs; krow++){
2446             atmp = PetscAbsScalar(*ba); ba++;
2447             row = brow + krow;    /* local row index */
2448             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2449             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2450             if (work[col] < atmp) work[col] = atmp;
2451           }
2452         }
2453         bj++;
2454       }
2455     }
2456     /*
2457       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2458       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2459       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2460       */
2461 
2462     /* send values to its owners */
2463     for (dest=rank+1; dest<size; dest++){
2464       svalues = work + rowners_bs[dest];
2465       count   = rowners_bs[dest+1]-rowners_bs[dest];
2466       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2467       /*
2468       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2469       PetscSynchronizedFlush(A->comm);
2470       */
2471     }
2472   }
2473 
2474   /* receive values */
2475   if (rank){
2476     rvalues = work;
2477     count   = rowners_bs[rank+1]-rowners_bs[rank];
2478     for (source=0; source<rank; source++){
2479       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2480       /* process values */
2481       for (i=0; i<count; i++){
2482         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2483       }
2484       /*
2485       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2486       PetscSynchronizedFlush(A->comm);
2487       */
2488     }
2489   }
2490 
2491   ierr = VecRestoreArrayFast(v,&va);CHKERRQ(ierr);
2492   ierr = PetscFree(work);CHKERRQ(ierr);
2493   PetscFunctionReturn(0);
2494 }
2495 
2496 #undef __FUNCT__
2497 #define __FUNCT__ "MatRelax_MPISBAIJ"
2498 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2499 {
2500   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2501   int            ierr,mbs=mat->mbs,bs=mat->bs;
2502   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2503   Vec            bb1;
2504 
2505   PetscFunctionBegin;
2506   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2507   if (bs > 1)
2508     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2509 
2510   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2511     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2512       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2513       its--;
2514     }
2515 
2516     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2517     while (its--){
2518 
2519       /* lower triangular part: slvec0b = - B^T*xx */
2520       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2521 
2522       /* copy xx into slvec0a */
2523       ierr = VecGetArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr);
2524       ierr = VecGetArrayFast(xx,&x);CHKERRQ(ierr);
2525       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2526       ierr = VecRestoreArrayFast(mat->slvec0,&ptr);CHKERRQ(ierr);
2527 
2528       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2529 
2530       /* copy bb into slvec1a */
2531       ierr = VecGetArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr);
2532       ierr = VecGetArrayFast(bb,&b);CHKERRQ(ierr);
2533       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2534       ierr = VecRestoreArrayFast(mat->slvec1,&ptr);CHKERRQ(ierr);
2535 
2536       /* set slvec1b = 0 */
2537       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2538 
2539       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2540       ierr = VecRestoreArrayFast(xx,&x);CHKERRQ(ierr);
2541       ierr = VecRestoreArrayFast(bb,&b);CHKERRQ(ierr);
2542       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2543 
2544       /* upper triangular part: bb1 = bb1 - B*x */
2545       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2546 
2547       /* local diagonal sweep */
2548       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2549     }
2550     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2551   } else {
2552     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2553   }
2554   PetscFunctionReturn(0);
2555 }
2556 
2557 #undef __FUNCT__
2558 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2559 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2560 {
2561   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2562   int            ierr;
2563   PetscScalar    mone=-1.0;
2564   Vec            lvec1,bb1;
2565 
2566   PetscFunctionBegin;
2567   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2568   if (mat->bs > 1)
2569     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2570 
2571   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2572     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2573       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2574       its--;
2575     }
2576 
2577     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2578     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2579     while (its--){
2580       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2581 
2582       /* lower diagonal part: bb1 = bb - B^T*xx */
2583       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2584       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2585 
2586       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2587       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2588       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2589 
2590       /* upper diagonal part: bb1 = bb1 - B*x */
2591       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2592       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2593 
2594       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2595 
2596       /* diagonal sweep */
2597       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2598     }
2599     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2600     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2601   } else {
2602     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2603   }
2604   PetscFunctionReturn(0);
2605 }
2606 
2607