xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 8115998fe213e648cf61076ec2e7e263c950dbeb)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *);
14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 extern int MatUseSpooles_MPISBAIJ(Mat);
24 
25 /*  UGLY, ugly, ugly
26    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
27    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
28    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
29    converts the entries into single precision and then calls ..._MatScalar() to put them
30    into the single precision data structures.
31 */
32 #if defined(PETSC_USE_MAT_SINGLE)
33 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
36 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
37 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
38 #else
39 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
40 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
41 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
42 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
43 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
44 #endif
45 
46 EXTERN_C_BEGIN
47 #undef __FUNCT__
48 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
49 int MatStoreValues_MPISBAIJ(Mat mat)
50 {
51   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
52   int          ierr;
53 
54   PetscFunctionBegin;
55   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
56   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
57   PetscFunctionReturn(0);
58 }
59 EXTERN_C_END
60 
61 EXTERN_C_BEGIN
62 #undef __FUNCT__
63 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
64 int MatRetrieveValues_MPISBAIJ(Mat mat)
65 {
66   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
67   int          ierr;
68 
69   PetscFunctionBegin;
70   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
71   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
72   PetscFunctionReturn(0);
73 }
74 EXTERN_C_END
75 
76 /*
77      Local utility routine that creates a mapping from the global column
78    number to the local number in the off-diagonal part of the local
79    storage of the matrix.  This is done in a non scable way since the
80    length of colmap equals the global matrix length.
81 */
82 #undef __FUNCT__
83 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
84 static int CreateColmap_MPISBAIJ_Private(Mat mat)
85 {
86   PetscFunctionBegin;
87   SETERRQ(1,"Function not yet written for SBAIJ format");
88   /* PetscFunctionReturn(0); */
89 }
90 
91 #define CHUNKSIZE  10
92 
93 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
94 { \
95  \
96     brow = row/bs;  \
97     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
98     rmax = aimax[brow]; nrow = ailen[brow]; \
99       bcol = col/bs; \
100       ridx = row % bs; cidx = col % bs; \
101       low = 0; high = nrow; \
102       while (high-low > 3) { \
103         t = (low+high)/2; \
104         if (rp[t] > bcol) high = t; \
105         else              low  = t; \
106       } \
107       for (_i=low; _i<high; _i++) { \
108         if (rp[_i] > bcol) break; \
109         if (rp[_i] == bcol) { \
110           bap  = ap +  bs2*_i + bs*cidx + ridx; \
111           if (addv == ADD_VALUES) *bap += value;  \
112           else                    *bap  = value;  \
113           goto a_noinsert; \
114         } \
115       } \
116       if (a->nonew == 1) goto a_noinsert; \
117       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
118       if (nrow >= rmax) { \
119         /* there is no extra room in row, therefore enlarge */ \
120         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
121         MatScalar *new_a; \
122  \
123         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
124  \
125         /* malloc new storage space */ \
126         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
127         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
128         new_j = (int*)(new_a + bs2*new_nz); \
129         new_i = new_j + new_nz; \
130  \
131         /* copy over old data into new slots */ \
132         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
133         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
134         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
135         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
136         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
137         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
138         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
139         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
140                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
141         /* free up old matrix storage */ \
142         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
143         if (!a->singlemalloc) { \
144           ierr = PetscFree(a->i);CHKERRQ(ierr); \
145           ierr = PetscFree(a->j);CHKERRQ(ierr);\
146         } \
147         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
148         a->singlemalloc = PETSC_TRUE; \
149  \
150         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
151         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
152         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
153         a->s_maxnz += bs2*CHUNKSIZE; \
154         a->reallocs++; \
155         a->s_nz++; \
156       } \
157       N = nrow++ - 1;  \
158       /* shift up all the later entries in this row */ \
159       for (ii=N; ii>=_i; ii--) { \
160         rp[ii+1] = rp[ii]; \
161         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
162       } \
163       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
164       rp[_i]                      = bcol;  \
165       ap[bs2*_i + bs*cidx + ridx] = value;  \
166       a_noinsert:; \
167     ailen[brow] = nrow; \
168 }
169 #ifndef MatSetValues_SeqBAIJ_B_Private
170 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
171 { \
172     brow = row/bs;  \
173     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
174     rmax = bimax[brow]; nrow = bilen[brow]; \
175       bcol = col/bs; \
176       ridx = row % bs; cidx = col % bs; \
177       low = 0; high = nrow; \
178       while (high-low > 3) { \
179         t = (low+high)/2; \
180         if (rp[t] > bcol) high = t; \
181         else              low  = t; \
182       } \
183       for (_i=low; _i<high; _i++) { \
184         if (rp[_i] > bcol) break; \
185         if (rp[_i] == bcol) { \
186           bap  = ap +  bs2*_i + bs*cidx + ridx; \
187           if (addv == ADD_VALUES) *bap += value;  \
188           else                    *bap  = value;  \
189           goto b_noinsert; \
190         } \
191       } \
192       if (b->nonew == 1) goto b_noinsert; \
193       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
194       if (nrow >= rmax) { \
195         /* there is no extra room in row, therefore enlarge */ \
196         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
197         MatScalar *new_a; \
198  \
199         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
200  \
201         /* malloc new storage space */ \
202         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
203         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
204         new_j = (int*)(new_a + bs2*new_nz); \
205         new_i = new_j + new_nz; \
206  \
207         /* copy over old data into new slots */ \
208         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
209         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
210         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
211         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
212         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
213         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
215         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
216                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
217         /* free up old matrix storage */ \
218         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
219         if (!b->singlemalloc) { \
220           ierr = PetscFree(b->i);CHKERRQ(ierr); \
221           ierr = PetscFree(b->j);CHKERRQ(ierr); \
222         } \
223         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
224         b->singlemalloc = PETSC_TRUE; \
225  \
226         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
227         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
228         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
229         b->maxnz += bs2*CHUNKSIZE; \
230         b->reallocs++; \
231         b->nz++; \
232       } \
233       N = nrow++ - 1;  \
234       /* shift up all the later entries in this row */ \
235       for (ii=N; ii>=_i; ii--) { \
236         rp[ii+1] = rp[ii]; \
237         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
238       } \
239       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
240       rp[_i]                      = bcol;  \
241       ap[bs2*_i + bs*cidx + ridx] = value;  \
242       b_noinsert:; \
243     bilen[brow] = nrow; \
244 }
245 #endif
246 
247 #if defined(PETSC_USE_MAT_SINGLE)
248 #undef __FUNCT__
249 #define __FUNCT__ "MatSetValues_MPISBAIJ"
250 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
251 {
252   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
253   int          ierr,i,N = m*n;
254   MatScalar    *vsingle;
255 
256   PetscFunctionBegin;
257   if (N > b->setvalueslen) {
258     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
259     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
260     b->setvalueslen  = N;
261   }
262   vsingle = b->setvaluescopy;
263 
264   for (i=0; i<N; i++) {
265     vsingle[i] = v[i];
266   }
267   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
268   PetscFunctionReturn(0);
269 }
270 
271 #undef __FUNCT__
272 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
273 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
274 {
275   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
276   int         ierr,i,N = m*n*b->bs2;
277   MatScalar   *vsingle;
278 
279   PetscFunctionBegin;
280   if (N > b->setvalueslen) {
281     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
282     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
283     b->setvalueslen  = N;
284   }
285   vsingle = b->setvaluescopy;
286   for (i=0; i<N; i++) {
287     vsingle[i] = v[i];
288   }
289   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
290   PetscFunctionReturn(0);
291 }
292 
293 #undef __FUNCT__
294 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
295 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
296 {
297   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
298   int         ierr,i,N = m*n;
299   MatScalar   *vsingle;
300 
301   PetscFunctionBegin;
302   SETERRQ(1,"Function not yet written for SBAIJ format");
303   /* PetscFunctionReturn(0); */
304 }
305 
306 #undef __FUNCT__
307 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
308 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
309 {
310   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
311   int         ierr,i,N = m*n*b->bs2;
312   MatScalar   *vsingle;
313 
314   PetscFunctionBegin;
315   SETERRQ(1,"Function not yet written for SBAIJ format");
316   /* PetscFunctionReturn(0); */
317 }
318 #endif
319 
320 /* Only add/insert a(i,j) with i<=j (blocks).
321    Any a(i,j) with i>j input by user is ingored.
322 */
323 #undef __FUNCT__
324 #define __FUNCT__ "MatSetValues_MPIBAIJ"
325 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
326 {
327   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
328   MatScalar    value;
329   PetscTruth   roworiented = baij->roworiented;
330   int          ierr,i,j,row,col;
331   int          rstart_orig=baij->rstart_bs;
332   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
333   int          cend_orig=baij->cend_bs,bs=baij->bs;
334 
335   /* Some Variables required in the macro */
336   Mat          A = baij->A;
337   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
338   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
339   MatScalar    *aa=a->a;
340 
341   Mat          B = baij->B;
342   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
343   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
344   MatScalar    *ba=b->a;
345 
346   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
347   int          low,high,t,ridx,cidx,bs2=a->bs2;
348   MatScalar    *ap,*bap;
349 
350   /* for stash */
351   int          n_loc, *in_loc=0;
352   MatScalar    *v_loc=0;
353 
354   PetscFunctionBegin;
355 
356   if(!baij->donotstash){
357     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
358     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
359   }
360 
361   for (i=0; i<m; i++) {
362     if (im[i] < 0) continue;
363 #if defined(PETSC_USE_BOPT_g)
364     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
365 #endif
366     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
367       row = im[i] - rstart_orig;              /* local row index */
368       for (j=0; j<n; j++) {
369         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
370         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
371           col = in[j] - cstart_orig;          /* local col index */
372           brow = row/bs; bcol = col/bs;
373           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
374           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
375           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
376           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
377         } else if (in[j] < 0) continue;
378 #if defined(PETSC_USE_BOPT_g)
379         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
380 #endif
381         else {  /* off-diag entry (B) */
382           if (mat->was_assembled) {
383             if (!baij->colmap) {
384               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
385             }
386 #if defined (PETSC_USE_CTABLE)
387             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
388             col  = col - 1;
389 #else
390             col = baij->colmap[in[j]/bs] - 1;
391 #endif
392             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
393               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
394               col =  in[j];
395               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
396               B = baij->B;
397               b = (Mat_SeqBAIJ*)(B)->data;
398               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
399               ba=b->a;
400             } else col += in[j]%bs;
401           } else col = in[j];
402           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
403           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
404           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
405         }
406       }
407     } else {  /* off processor entry */
408       if (!baij->donotstash) {
409         n_loc = 0;
410         for (j=0; j<n; j++){
411           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
412           in_loc[n_loc] = in[j];
413           if (roworiented) {
414             v_loc[n_loc] = v[i*n+j];
415           } else {
416             v_loc[n_loc] = v[j*m+i];
417           }
418           n_loc++;
419         }
420         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
421       }
422     }
423   }
424 
425   if(!baij->donotstash){
426     ierr = PetscFree(in_loc);CHKERRQ(ierr);
427     ierr = PetscFree(v_loc);CHKERRQ(ierr);
428   }
429   PetscFunctionReturn(0);
430 }
431 
432 #undef __FUNCT__
433 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
434 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
435 {
436   PetscFunctionBegin;
437   SETERRQ(1,"Function not yet written for SBAIJ format");
438   /* PetscFunctionReturn(0); */
439 }
440 
441 #define HASH_KEY 0.6180339887
442 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
443 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
444 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
445 #undef __FUNCT__
446 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
447 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
448 {
449   PetscFunctionBegin;
450   SETERRQ(1,"Function not yet written for SBAIJ format");
451   /* PetscFunctionReturn(0); */
452 }
453 
454 #undef __FUNCT__
455 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
456 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
457 {
458   PetscFunctionBegin;
459   SETERRQ(1,"Function not yet written for SBAIJ format");
460   /* PetscFunctionReturn(0); */
461 }
462 
463 #undef __FUNCT__
464 #define __FUNCT__ "MatGetValues_MPISBAIJ"
465 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
466 {
467   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
468   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
469   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
470 
471   PetscFunctionBegin;
472   for (i=0; i<m; i++) {
473     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
474     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
475     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
476       row = idxm[i] - bsrstart;
477       for (j=0; j<n; j++) {
478         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
479         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
480         if (idxn[j] >= bscstart && idxn[j] < bscend){
481           col = idxn[j] - bscstart;
482           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
483         } else {
484           if (!baij->colmap) {
485             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
486           }
487 #if defined (PETSC_USE_CTABLE)
488           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
489           data --;
490 #else
491           data = baij->colmap[idxn[j]/bs]-1;
492 #endif
493           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
494           else {
495             col  = data + idxn[j]%bs;
496             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
497           }
498         }
499       }
500     } else {
501       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
502     }
503   }
504  PetscFunctionReturn(0);
505 }
506 
507 #undef __FUNCT__
508 #define __FUNCT__ "MatNorm_MPISBAIJ"
509 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
510 {
511   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
512   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
513   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
514   int        ierr;
515   PetscReal  sum[2],*lnorm2;
516 
517   PetscFunctionBegin;
518   if (baij->size == 1) {
519     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
520   } else {
521     if (type == NORM_FROBENIUS) {
522       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
523       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
524       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
525       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
526       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
527       /*
528       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
529       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
530       */
531       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       /*
533       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
534       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
535 
536       *norm = sqrt(sum[0] + 2*sum[1]);
537       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
538     } else {
539       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
540     }
541   }
542   PetscFunctionReturn(0);
543 }
544 
545 /*
546   Creates the hash table, and sets the table
547   This table is created only once.
548   If new entried need to be added to the matrix
549   then the hash table has to be destroyed and
550   recreated.
551 */
552 #undef __FUNCT__
553 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
554 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
555 {
556   PetscFunctionBegin;
557   SETERRQ(1,"Function not yet written for SBAIJ format");
558   /* PetscFunctionReturn(0); */
559 }
560 
561 #undef __FUNCT__
562 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
563 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
564 {
565   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
566   int         ierr,nstash,reallocs;
567   InsertMode  addv;
568 
569   PetscFunctionBegin;
570   if (baij->donotstash) {
571     PetscFunctionReturn(0);
572   }
573 
574   /* make sure all processors are either in INSERTMODE or ADDMODE */
575   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
576   if (addv == (ADD_VALUES|INSERT_VALUES)) {
577     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
578   }
579   mat->insertmode = addv; /* in case this processor had no cache */
580 
581   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
582   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
583   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
584   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
585   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
586   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
587   PetscFunctionReturn(0);
588 }
589 
590 #undef __FUNCT__
591 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
592 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
593 {
594   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
595   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
596   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
597   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
598   int         *row,*col,other_disassembled;
599   PetscTruth  r1,r2,r3;
600   MatScalar   *val;
601   InsertMode  addv = mat->insertmode;
602 #if defined(PETSC_HAVE_SPOOLES)
603   PetscTruth  flag;
604 #endif
605 
606   PetscFunctionBegin;
607 
608   if (!baij->donotstash) {
609     while (1) {
610       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
611       /*
612       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
613       PetscSynchronizedFlush(PETSC_COMM_WORLD);
614       */
615       if (!flg) break;
616 
617       for (i=0; i<n;) {
618         /* Now identify the consecutive vals belonging to the same row */
619         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
620         if (j < n) ncols = j-i;
621         else       ncols = n-i;
622         /* Now assemble all these values with a single function call */
623         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
624         i = j;
625       }
626     }
627     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
628     /* Now process the block-stash. Since the values are stashed column-oriented,
629        set the roworiented flag to column oriented, and after MatSetValues()
630        restore the original flags */
631     r1 = baij->roworiented;
632     r2 = a->roworiented;
633     r3 = b->roworiented;
634     baij->roworiented = PETSC_FALSE;
635     a->roworiented    = PETSC_FALSE;
636     b->roworiented    = PETSC_FALSE;
637     while (1) {
638       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
639       if (!flg) break;
640 
641       for (i=0; i<n;) {
642         /* Now identify the consecutive vals belonging to the same row */
643         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
644         if (j < n) ncols = j-i;
645         else       ncols = n-i;
646         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
647         i = j;
648       }
649     }
650     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
651     baij->roworiented = r1;
652     a->roworiented    = r2;
653     b->roworiented    = r3;
654   }
655 
656   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
657   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
658 
659   /* determine if any processor has disassembled, if so we must
660      also disassemble ourselfs, in order that we may reassemble. */
661   /*
662      if nonzero structure of submatrix B cannot change then we know that
663      no processor disassembled thus we can skip this stuff
664   */
665   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
666     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
667     if (mat->was_assembled && !other_disassembled) {
668       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
669     }
670   }
671 
672   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
673     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
674   }
675   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
676   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
677 
678 #if defined(PETSC_USE_BOPT_g)
679   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
680     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
681     baij->ht_total_ct  = 0;
682     baij->ht_insert_ct = 0;
683   }
684 #endif
685   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
686     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
687     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
688     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
689   }
690 
691   if (baij->rowvalues) {
692     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
693     baij->rowvalues = 0;
694   }
695 
696 #if defined(PETSC_HAVE_SPOOLES)
697   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_sbaij_spooles",&flag);CHKERRQ(ierr);
698   if (flag) { ierr = MatUseSpooles_MPISBAIJ(mat);CHKERRQ(ierr); }
699 #endif
700   PetscFunctionReturn(0);
701 }
702 
703 #undef __FUNCT__
704 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
705 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
706 {
707   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
708   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
709   PetscTruth        isascii,isdraw;
710   PetscViewer       sviewer;
711   PetscViewerFormat format;
712 
713   PetscFunctionBegin;
714   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
715   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
716   if (isascii) {
717     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
718     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
719       MatInfo info;
720       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
721       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
722       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
723               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
724               baij->bs,(int)info.memory);CHKERRQ(ierr);
725       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
726       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
727       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
728       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
729       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
730       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
731       PetscFunctionReturn(0);
732     } else if (format == PETSC_VIEWER_ASCII_INFO) {
733       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
734       PetscFunctionReturn(0);
735     }
736   }
737 
738   if (isdraw) {
739     PetscDraw       draw;
740     PetscTruth isnull;
741     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
742     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
743   }
744 
745   if (size == 1) {
746     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
747     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
748   } else {
749     /* assemble the entire matrix onto first processor. */
750     Mat         A;
751     Mat_SeqSBAIJ *Aloc;
752     Mat_SeqBAIJ *Bloc;
753     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
754     MatScalar   *a;
755 
756     if (!rank) {
757       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
758     } else {
759       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
760     }
761     PetscLogObjectParent(mat,A);
762 
763     /* copy over the A part */
764     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
765     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
766     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
767 
768     for (i=0; i<mbs; i++) {
769       rvals[0] = bs*(baij->rstart + i);
770       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
771       for (j=ai[i]; j<ai[i+1]; j++) {
772         col = (baij->cstart+aj[j])*bs;
773         for (k=0; k<bs; k++) {
774           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
775           col++; a += bs;
776         }
777       }
778     }
779     /* copy over the B part */
780     Bloc = (Mat_SeqBAIJ*)baij->B->data;
781     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
782     for (i=0; i<mbs; i++) {
783       rvals[0] = bs*(baij->rstart + i);
784       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
785       for (j=ai[i]; j<ai[i+1]; j++) {
786         col = baij->garray[aj[j]]*bs;
787         for (k=0; k<bs; k++) {
788           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
789           col++; a += bs;
790         }
791       }
792     }
793     ierr = PetscFree(rvals);CHKERRQ(ierr);
794     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
795     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
796     /*
797        Everyone has to call to draw the matrix since the graphics waits are
798        synchronized across all processors that share the PetscDraw object
799     */
800     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
801     if (!rank) {
802       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
803       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
804     }
805     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
806     ierr = MatDestroy(A);CHKERRQ(ierr);
807   }
808   PetscFunctionReturn(0);
809 }
810 
811 #undef __FUNCT__
812 #define __FUNCT__ "MatView_MPISBAIJ"
813 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
814 {
815   int        ierr;
816   PetscTruth isascii,isdraw,issocket,isbinary;
817 
818   PetscFunctionBegin;
819   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
820   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
821   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
822   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
823   if (isascii || isdraw || issocket || isbinary) {
824     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
825   } else {
826     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
827   }
828   PetscFunctionReturn(0);
829 }
830 
831 #undef __FUNCT__
832 #define __FUNCT__ "MatDestroy_MPISBAIJ"
833 int MatDestroy_MPISBAIJ(Mat mat)
834 {
835   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
836   int         ierr;
837 
838   PetscFunctionBegin;
839 #if defined(PETSC_USE_LOG)
840   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
841 #endif
842   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
843   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
844   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
845   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
846   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
847 #if defined (PETSC_USE_CTABLE)
848   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
849 #else
850   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
851 #endif
852   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
853   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
854   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
855   if (baij->slvec0) {
856     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
857     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
858   }
859   if (baij->slvec1) {
860     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
861     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
862     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
863   }
864   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
865   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
866   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
867   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
868 #if defined(PETSC_USE_MAT_SINGLE)
869   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
870 #endif
871   ierr = PetscFree(baij);CHKERRQ(ierr);
872   PetscFunctionReturn(0);
873 }
874 
875 #undef __FUNCT__
876 #define __FUNCT__ "MatMult_MPISBAIJ"
877 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
878 {
879   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
880   int         ierr,nt,mbs=a->mbs,bs=a->bs;
881   PetscScalar *x,*from,zero=0.0;
882 
883   PetscFunctionBegin;
884   /*
885   PetscSynchronizedPrintf(PETSC_COMM_WORLD," _1comm is called ...\n");
886   PetscSynchronizedFlush(PETSC_COMM_WORLD);
887   */
888   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
889   if (nt != A->n) {
890     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
891   }
892   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
893   if (nt != A->m) {
894     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
895   }
896 
897   /* diagonal part */
898   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
899   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
900 
901   /* subdiagonal part */
902   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
903 
904   /* copy x into the vec slvec0 */
905   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
906   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
907   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
908   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
909 
910   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
911   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
912   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
913 
914   /* supperdiagonal part */
915   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
916 
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
922 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
923 {
924   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
925   int         ierr,nt;
926 
927   PetscFunctionBegin;
928   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
929   if (nt != A->n) {
930     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
931   }
932   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
933   if (nt != A->m) {
934     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
935   }
936 
937   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
938   /* do diagonal part */
939   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
940   /* do supperdiagonal part */
941   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
942   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
943   /* do subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
945   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
946   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
947 
948   PetscFunctionReturn(0);
949 }
950 
951 #undef __FUNCT__
952 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
953 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
954 {
955   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
956   int          ierr,mbs=a->mbs,bs=a->bs;
957   PetscScalar  *x,*from,zero=0.0;
958 
959   PetscFunctionBegin;
960   /*
961   PetscSynchronizedPrintf(PETSC_COMM_WORLD," MatMultAdd is called ...\n");
962   PetscSynchronizedFlush(PETSC_COMM_WORLD);
963   */
964   /* diagonal part */
965   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
966   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
967 
968   /* subdiagonal part */
969   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
970 
971   /* copy x into the vec slvec0 */
972   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
973   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
974   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
975   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
976 
977   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
978   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
980 
981   /* supperdiagonal part */
982   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
983 
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
989 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
990 {
991   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
992   int        ierr;
993 
994   PetscFunctionBegin;
995   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
996   /* do diagonal part */
997   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
998   /* do supperdiagonal part */
999   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1000   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1001 
1002   /* do subdiagonal part */
1003   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1004   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1005   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1006 
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1012 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1013 {
1014   PetscFunctionBegin;
1015   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1016   /* PetscFunctionReturn(0); */
1017 }
1018 
1019 #undef __FUNCT__
1020 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1021 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1022 {
1023   PetscFunctionBegin;
1024   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1025   /* PetscFunctionReturn(0); */
1026 }
1027 
1028 /*
1029   This only works correctly for square matrices where the subblock A->A is the
1030    diagonal block
1031 */
1032 #undef __FUNCT__
1033 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1034 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1035 {
1036   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1037   int         ierr;
1038 
1039   PetscFunctionBegin;
1040   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1041   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1042   PetscFunctionReturn(0);
1043 }
1044 
1045 #undef __FUNCT__
1046 #define __FUNCT__ "MatScale_MPISBAIJ"
1047 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A)
1048 {
1049   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1050   int         ierr;
1051 
1052   PetscFunctionBegin;
1053   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1054   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1055   PetscFunctionReturn(0);
1056 }
1057 
1058 #undef __FUNCT__
1059 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1060 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1061 {
1062   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1063   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1064   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1065   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1066   int            *cmap,*idx_p,cstart = mat->cstart;
1067 
1068   PetscFunctionBegin;
1069   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1070   mat->getrowactive = PETSC_TRUE;
1071 
1072   if (!mat->rowvalues && (idx || v)) {
1073     /*
1074         allocate enough space to hold information from the longest row.
1075     */
1076     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1077     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1078     int     max = 1,mbs = mat->mbs,tmp;
1079     for (i=0; i<mbs; i++) {
1080       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1081       if (max < tmp) { max = tmp; }
1082     }
1083     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1084     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1085   }
1086 
1087   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1088   lrow = row - brstart;  /* local row index */
1089 
1090   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1091   if (!v)   {pvA = 0; pvB = 0;}
1092   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1093   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1094   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1095   nztot = nzA + nzB;
1096 
1097   cmap  = mat->garray;
1098   if (v  || idx) {
1099     if (nztot) {
1100       /* Sort by increasing column numbers, assuming A and B already sorted */
1101       int imark = -1;
1102       if (v) {
1103         *v = v_p = mat->rowvalues;
1104         for (i=0; i<nzB; i++) {
1105           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1106           else break;
1107         }
1108         imark = i;
1109         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1110         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1111       }
1112       if (idx) {
1113         *idx = idx_p = mat->rowindices;
1114         if (imark > -1) {
1115           for (i=0; i<imark; i++) {
1116             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1117           }
1118         } else {
1119           for (i=0; i<nzB; i++) {
1120             if (cmap[cworkB[i]/bs] < cstart)
1121               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1122             else break;
1123           }
1124           imark = i;
1125         }
1126         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1127         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1128       }
1129     } else {
1130       if (idx) *idx = 0;
1131       if (v)   *v   = 0;
1132     }
1133   }
1134   *nz = nztot;
1135   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1136   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1137   PetscFunctionReturn(0);
1138 }
1139 
1140 #undef __FUNCT__
1141 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1142 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1143 {
1144   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1145 
1146   PetscFunctionBegin;
1147   if (baij->getrowactive == PETSC_FALSE) {
1148     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1149   }
1150   baij->getrowactive = PETSC_FALSE;
1151   PetscFunctionReturn(0);
1152 }
1153 
1154 #undef __FUNCT__
1155 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1156 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1157 {
1158   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1159 
1160   PetscFunctionBegin;
1161   *bs = baij->bs;
1162   PetscFunctionReturn(0);
1163 }
1164 
1165 #undef __FUNCT__
1166 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1167 int MatZeroEntries_MPISBAIJ(Mat A)
1168 {
1169   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1170   int         ierr;
1171 
1172   PetscFunctionBegin;
1173   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1174   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1175   PetscFunctionReturn(0);
1176 }
1177 
1178 #undef __FUNCT__
1179 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1180 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1181 {
1182   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1183   Mat         A = a->A,B = a->B;
1184   int         ierr;
1185   PetscReal   isend[5],irecv[5];
1186 
1187   PetscFunctionBegin;
1188   info->block_size     = (PetscReal)a->bs;
1189   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1190   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1191   isend[3] = info->memory;  isend[4] = info->mallocs;
1192   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1193   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1194   isend[3] += info->memory;  isend[4] += info->mallocs;
1195   if (flag == MAT_LOCAL) {
1196     info->nz_used      = isend[0];
1197     info->nz_allocated = isend[1];
1198     info->nz_unneeded  = isend[2];
1199     info->memory       = isend[3];
1200     info->mallocs      = isend[4];
1201   } else if (flag == MAT_GLOBAL_MAX) {
1202     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1203     info->nz_used      = irecv[0];
1204     info->nz_allocated = irecv[1];
1205     info->nz_unneeded  = irecv[2];
1206     info->memory       = irecv[3];
1207     info->mallocs      = irecv[4];
1208   } else if (flag == MAT_GLOBAL_SUM) {
1209     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1210     info->nz_used      = irecv[0];
1211     info->nz_allocated = irecv[1];
1212     info->nz_unneeded  = irecv[2];
1213     info->memory       = irecv[3];
1214     info->mallocs      = irecv[4];
1215   } else {
1216     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1217   }
1218   info->rows_global       = (PetscReal)A->M;
1219   info->columns_global    = (PetscReal)A->N;
1220   info->rows_local        = (PetscReal)A->m;
1221   info->columns_local     = (PetscReal)A->N;
1222   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1223   info->fill_ratio_needed = 0;
1224   info->factor_mallocs    = 0;
1225   PetscFunctionReturn(0);
1226 }
1227 
1228 #undef __FUNCT__
1229 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1230 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1231 {
1232   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1233   int         ierr;
1234 
1235   PetscFunctionBegin;
1236   switch (op) {
1237   case MAT_NO_NEW_NONZERO_LOCATIONS:
1238   case MAT_YES_NEW_NONZERO_LOCATIONS:
1239   case MAT_COLUMNS_UNSORTED:
1240   case MAT_COLUMNS_SORTED:
1241   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1242   case MAT_KEEP_ZEROED_ROWS:
1243   case MAT_NEW_NONZERO_LOCATION_ERR:
1244     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1245     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1246     break;
1247   case MAT_ROW_ORIENTED:
1248     a->roworiented = PETSC_TRUE;
1249     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1250     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1251     break;
1252   case MAT_ROWS_SORTED:
1253   case MAT_ROWS_UNSORTED:
1254   case MAT_YES_NEW_DIAGONALS:
1255   case MAT_USE_SINGLE_PRECISION_SOLVES:
1256     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1257     break;
1258   case MAT_COLUMN_ORIENTED:
1259     a->roworiented = PETSC_FALSE;
1260     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1261     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1262     break;
1263   case MAT_IGNORE_OFF_PROC_ENTRIES:
1264     a->donotstash = PETSC_TRUE;
1265     break;
1266   case MAT_NO_NEW_DIAGONALS:
1267     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1268   case MAT_USE_HASH_TABLE:
1269     a->ht_flag = PETSC_TRUE;
1270     break;
1271   default:
1272     SETERRQ(PETSC_ERR_SUP,"unknown option");
1273   }
1274   PetscFunctionReturn(0);
1275 }
1276 
1277 #undef __FUNCT__
1278 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1279 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1280 {
1281   int ierr;
1282   PetscFunctionBegin;
1283   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1289 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1290 {
1291   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1292   Mat         a = baij->A,b = baij->B;
1293   int         ierr,s1,s2,s3;
1294 
1295   PetscFunctionBegin;
1296   if (ll != rr) {
1297     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1298   }
1299   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1300   if (rr) {
1301     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1302     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1303     /* Overlap communication with computation. */
1304     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1305     /*} if (ll) { */
1306     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1307     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1308     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1309     /* } */
1310   /* scale  the diagonal block */
1311   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1312 
1313   /* if (rr) { */
1314     /* Do a scatter end and then right scale the off-diagonal block */
1315     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1316     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1317   }
1318 
1319   PetscFunctionReturn(0);
1320 }
1321 
1322 #undef __FUNCT__
1323 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1324 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag)
1325 {
1326   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1327   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1328   int            *procs,*nprocs,j,idx,nsends,*work,row;
1329   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1330   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1331   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1332   MPI_Comm       comm = A->comm;
1333   MPI_Request    *send_waits,*recv_waits;
1334   MPI_Status     recv_status,*send_status;
1335   IS             istmp;
1336   PetscTruth     found;
1337 
1338   PetscFunctionBegin;
1339   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1340   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1341 
1342   /*  first count number of contributors to each processor */
1343   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1344   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1345   procs = nprocs + size;
1346   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1347   for (i=0; i<N; i++) {
1348     idx   = rows[i];
1349     found = PETSC_FALSE;
1350     for (j=0; j<size; j++) {
1351       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1352         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1353       }
1354     }
1355     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1356   }
1357   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1358 
1359   /* inform other processors of number of messages and max length*/
1360   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1361   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1362   nmax   = work[rank];
1363   nrecvs = work[size+rank];
1364   ierr   = PetscFree(work);CHKERRQ(ierr);
1365 
1366   /* post receives:   */
1367   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1368   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1369   for (i=0; i<nrecvs; i++) {
1370     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1371   }
1372 
1373   /* do sends:
1374      1) starts[i] gives the starting index in svalues for stuff going to
1375      the ith processor
1376   */
1377   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1378   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1379   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1380   starts[0]  = 0;
1381   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1382   for (i=0; i<N; i++) {
1383     svalues[starts[owner[i]]++] = rows[i];
1384   }
1385   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1386 
1387   starts[0] = 0;
1388   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1389   count = 0;
1390   for (i=0; i<size; i++) {
1391     if (procs[i]) {
1392       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1393     }
1394   }
1395   ierr = PetscFree(starts);CHKERRQ(ierr);
1396 
1397   base = owners[rank]*bs;
1398 
1399   /*  wait on receives */
1400   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1401   source = lens + nrecvs;
1402   count  = nrecvs; slen = 0;
1403   while (count) {
1404     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1405     /* unpack receives into our local space */
1406     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1407     source[imdex]  = recv_status.MPI_SOURCE;
1408     lens[imdex]    = n;
1409     slen          += n;
1410     count--;
1411   }
1412   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1413 
1414   /* move the data into the send scatter */
1415   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1416   count = 0;
1417   for (i=0; i<nrecvs; i++) {
1418     values = rvalues + i*nmax;
1419     for (j=0; j<lens[i]; j++) {
1420       lrows[count++] = values[j] - base;
1421     }
1422   }
1423   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1424   ierr = PetscFree(lens);CHKERRQ(ierr);
1425   ierr = PetscFree(owner);CHKERRQ(ierr);
1426   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1427 
1428   /* actually zap the local rows */
1429   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1430   PetscLogObjectParent(A,istmp);
1431 
1432   /*
1433         Zero the required rows. If the "diagonal block" of the matrix
1434      is square and the user wishes to set the diagonal we use seperate
1435      code so that MatSetValues() is not called for each diagonal allocating
1436      new memory, thus calling lots of mallocs and slowing things down.
1437 
1438        Contributed by: Mathew Knepley
1439   */
1440   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1441   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1442   if (diag && (l->A->M == l->A->N)) {
1443     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1444   } else if (diag) {
1445     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1446     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1447       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1448 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1449     }
1450     for (i=0; i<slen; i++) {
1451       row  = lrows[i] + rstart_bs;
1452       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1453     }
1454     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1455     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1456   } else {
1457     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1458   }
1459 
1460   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1461   ierr = PetscFree(lrows);CHKERRQ(ierr);
1462 
1463   /* wait on sends */
1464   if (nsends) {
1465     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1466     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1467     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1468   }
1469   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1470   ierr = PetscFree(svalues);CHKERRQ(ierr);
1471 
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1477 int MatPrintHelp_MPISBAIJ(Mat A)
1478 {
1479   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1480   MPI_Comm    comm = A->comm;
1481   static int  called = 0;
1482   int         ierr;
1483 
1484   PetscFunctionBegin;
1485   if (!a->rank) {
1486     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1487   }
1488   if (called) {PetscFunctionReturn(0);} else called = 1;
1489   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1490   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1491   PetscFunctionReturn(0);
1492 }
1493 
1494 #undef __FUNCT__
1495 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1496 int MatSetUnfactored_MPISBAIJ(Mat A)
1497 {
1498   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1499   int         ierr;
1500 
1501   PetscFunctionBegin;
1502   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1503   PetscFunctionReturn(0);
1504 }
1505 
1506 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1507 
1508 #undef __FUNCT__
1509 #define __FUNCT__ "MatEqual_MPISBAIJ"
1510 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1511 {
1512   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1513   Mat         a,b,c,d;
1514   PetscTruth  flg;
1515   int         ierr;
1516 
1517   PetscFunctionBegin;
1518   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr);
1519   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1520   a = matA->A; b = matA->B;
1521   c = matB->A; d = matB->B;
1522 
1523   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1524   if (flg == PETSC_TRUE) {
1525     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1526   }
1527   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1528   PetscFunctionReturn(0);
1529 }
1530 
1531 #undef __FUNCT__
1532 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1533 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1534 {
1535   int        ierr;
1536 
1537   PetscFunctionBegin;
1538   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1539   PetscFunctionReturn(0);
1540 }
1541 /* -------------------------------------------------------------------*/
1542 static struct _MatOps MatOps_Values = {
1543   MatSetValues_MPISBAIJ,
1544   MatGetRow_MPISBAIJ,
1545   MatRestoreRow_MPISBAIJ,
1546   MatMult_MPISBAIJ,
1547   MatMultAdd_MPISBAIJ,
1548   MatMultTranspose_MPISBAIJ,
1549   MatMultTransposeAdd_MPISBAIJ,
1550   0,
1551   0,
1552   0,
1553   0,
1554   0,
1555   0,
1556   MatRelax_MPISBAIJ,
1557   MatTranspose_MPISBAIJ,
1558   MatGetInfo_MPISBAIJ,
1559   MatEqual_MPISBAIJ,
1560   MatGetDiagonal_MPISBAIJ,
1561   MatDiagonalScale_MPISBAIJ,
1562   MatNorm_MPISBAIJ,
1563   MatAssemblyBegin_MPISBAIJ,
1564   MatAssemblyEnd_MPISBAIJ,
1565   0,
1566   MatSetOption_MPISBAIJ,
1567   MatZeroEntries_MPISBAIJ,
1568   MatZeroRows_MPISBAIJ,
1569   0,
1570   0,
1571   0,
1572   0,
1573   MatSetUpPreallocation_MPISBAIJ,
1574   0,
1575   0,
1576   0,
1577   0,
1578   MatDuplicate_MPISBAIJ,
1579   0,
1580   0,
1581   0,
1582   0,
1583   0,
1584   MatGetSubMatrices_MPISBAIJ,
1585   MatIncreaseOverlap_MPISBAIJ,
1586   MatGetValues_MPISBAIJ,
1587   0,
1588   MatPrintHelp_MPISBAIJ,
1589   MatScale_MPISBAIJ,
1590   0,
1591   0,
1592   0,
1593   MatGetBlockSize_MPISBAIJ,
1594   0,
1595   0,
1596   0,
1597   0,
1598   0,
1599   0,
1600   MatSetUnfactored_MPISBAIJ,
1601   0,
1602   MatSetValuesBlocked_MPISBAIJ,
1603   0,
1604   0,
1605   0,
1606   MatGetPetscMaps_Petsc,
1607   0,
1608   0,
1609   0,
1610   0,
1611   0,
1612   0,
1613   MatGetRowMax_MPISBAIJ};
1614 
1615 
1616 EXTERN_C_BEGIN
1617 #undef __FUNCT__
1618 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1619 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1620 {
1621   PetscFunctionBegin;
1622   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1623   *iscopy = PETSC_FALSE;
1624   PetscFunctionReturn(0);
1625 }
1626 EXTERN_C_END
1627 
1628 EXTERN_C_BEGIN
1629 #undef __FUNCT__
1630 #define __FUNCT__ "MatCreate_MPISBAIJ"
1631 int MatCreate_MPISBAIJ(Mat B)
1632 {
1633   Mat_MPISBAIJ *b;
1634   int          ierr;
1635   PetscTruth   flg;
1636 
1637   PetscFunctionBegin;
1638 
1639   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1640   B->data = (void*)b;
1641   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1642   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1643 
1644   B->ops->destroy    = MatDestroy_MPISBAIJ;
1645   B->ops->view       = MatView_MPISBAIJ;
1646   B->mapping    = 0;
1647   B->factor     = 0;
1648   B->assembled  = PETSC_FALSE;
1649 
1650   B->insertmode = NOT_SET_VALUES;
1651   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1652   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1653 
1654   /* build local table of row and column ownerships */
1655   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1656   b->cowners    = b->rowners + b->size + 2;
1657   b->rowners_bs = b->cowners + b->size + 2;
1658   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1659 
1660   /* build cache for off array entries formed */
1661   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1662   b->donotstash  = PETSC_FALSE;
1663   b->colmap      = PETSC_NULL;
1664   b->garray      = PETSC_NULL;
1665   b->roworiented = PETSC_TRUE;
1666 
1667 #if defined(PETSC_USE_MAT_SINGLE)
1668   /* stuff for MatSetValues_XXX in single precision */
1669   b->setvalueslen     = 0;
1670   b->setvaluescopy    = PETSC_NULL;
1671 #endif
1672 
1673   /* stuff used in block assembly */
1674   b->barray       = 0;
1675 
1676   /* stuff used for matrix vector multiply */
1677   b->lvec         = 0;
1678   b->Mvctx        = 0;
1679   b->slvec0       = 0;
1680   b->slvec0b      = 0;
1681   b->slvec1       = 0;
1682   b->slvec1a      = 0;
1683   b->slvec1b      = 0;
1684   b->sMvctx       = 0;
1685 
1686   /* stuff for MatGetRow() */
1687   b->rowindices   = 0;
1688   b->rowvalues    = 0;
1689   b->getrowactive = PETSC_FALSE;
1690 
1691   /* hash table stuff */
1692   b->ht           = 0;
1693   b->hd           = 0;
1694   b->ht_size      = 0;
1695   b->ht_flag      = PETSC_FALSE;
1696   b->ht_fact      = 0;
1697   b->ht_total_ct  = 0;
1698   b->ht_insert_ct = 0;
1699 
1700   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1701   if (flg) {
1702     PetscReal fact = 1.39;
1703     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1704     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1705     if (fact <= 1.0) fact = 1.39;
1706     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1707     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1708   }
1709   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1710                                      "MatStoreValues_MPISBAIJ",
1711                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1712   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1713                                      "MatRetrieveValues_MPISBAIJ",
1714                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1715   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1716                                      "MatGetDiagonalBlock_MPISBAIJ",
1717                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1718   PetscFunctionReturn(0);
1719 }
1720 EXTERN_C_END
1721 
1722 #undef __FUNCT__
1723 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1724 /*@C
1725    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1726    the user should preallocate the matrix storage by setting the parameters
1727    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1728    performance can be increased by more than a factor of 50.
1729 
1730    Collective on Mat
1731 
1732    Input Parameters:
1733 +  A - the matrix
1734 .  bs   - size of blockk
1735 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1736            submatrix  (same for all local rows)
1737 .  d_nnz - array containing the number of block nonzeros in the various block rows
1738            in the upper triangular and diagonal part of the in diagonal portion of the local
1739            (possibly different for each block row) or PETSC_NULL.  You must leave room
1740            for the diagonal entry even if it is zero.
1741 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1742            submatrix (same for all local rows).
1743 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1744            off-diagonal portion of the local submatrix (possibly different for
1745            each block row) or PETSC_NULL.
1746 
1747 
1748    Options Database Keys:
1749 .   -mat_no_unroll - uses code that does not unroll the loops in the
1750                      block calculations (much slower)
1751 .   -mat_block_size - size of the blocks to use
1752 
1753    Notes:
1754 
1755    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1756    than it must be used on all processors that share the object for that argument.
1757 
1758    Storage Information:
1759    For a square global matrix we define each processor's diagonal portion
1760    to be its local rows and the corresponding columns (a square submatrix);
1761    each processor's off-diagonal portion encompasses the remainder of the
1762    local matrix (a rectangular submatrix).
1763 
1764    The user can specify preallocated storage for the diagonal part of
1765    the local submatrix with either d_nz or d_nnz (not both).  Set
1766    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1767    memory allocation.  Likewise, specify preallocated storage for the
1768    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1769 
1770    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1771    the figure below we depict these three local rows and all columns (0-11).
1772 
1773 .vb
1774            0 1 2 3 4 5 6 7 8 9 10 11
1775           -------------------
1776    row 3  |  o o o d d d o o o o o o
1777    row 4  |  o o o d d d o o o o o o
1778    row 5  |  o o o d d d o o o o o o
1779           -------------------
1780 .ve
1781 
1782    Thus, any entries in the d locations are stored in the d (diagonal)
1783    submatrix, and any entries in the o locations are stored in the
1784    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1785    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1786 
1787    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1788    plus the diagonal part of the d matrix,
1789    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1790    In general, for PDE problems in which most nonzeros are near the diagonal,
1791    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1792    or you will get TERRIBLE performance; see the users' manual chapter on
1793    matrices.
1794 
1795    Level: intermediate
1796 
1797 .keywords: matrix, block, aij, compressed row, sparse, parallel
1798 
1799 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1800 @*/
1801 
1802 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1803 {
1804   Mat_MPISBAIJ *b;
1805   int          ierr,i,mbs,Mbs;
1806   PetscTruth   flg2;
1807 
1808   PetscFunctionBegin;
1809   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr);
1810   if (!flg2) PetscFunctionReturn(0);
1811 
1812   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1813 
1814   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1815   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1816   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1817   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1818   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1819   if (d_nnz) {
1820     for (i=0; i<B->m/bs; i++) {
1821       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1822     }
1823   }
1824   if (o_nnz) {
1825     for (i=0; i<B->m/bs; i++) {
1826       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1827     }
1828   }
1829   B->preallocated = PETSC_TRUE;
1830   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1831   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1832   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1833   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1834 
1835   b   = (Mat_MPISBAIJ*)B->data;
1836   mbs = B->m/bs;
1837   Mbs = B->M/bs;
1838   if (mbs*bs != B->m) {
1839     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1840   }
1841 
1842   b->bs  = bs;
1843   b->bs2 = bs*bs;
1844   b->mbs = mbs;
1845   b->nbs = mbs;
1846   b->Mbs = Mbs;
1847   b->Nbs = Mbs;
1848 
1849   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1850   b->rowners[0]    = 0;
1851   for (i=2; i<=b->size; i++) {
1852     b->rowners[i] += b->rowners[i-1];
1853   }
1854   b->rstart    = b->rowners[b->rank];
1855   b->rend      = b->rowners[b->rank+1];
1856   b->cstart    = b->rstart;
1857   b->cend      = b->rend;
1858   for (i=0; i<=b->size; i++) {
1859     b->rowners_bs[i] = b->rowners[i]*bs;
1860   }
1861   b->rstart_bs = b-> rstart*bs;
1862   b->rend_bs   = b->rend*bs;
1863 
1864   b->cstart_bs = b->cstart*bs;
1865   b->cend_bs   = b->cend*bs;
1866 
1867 
1868   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1869   PetscLogObjectParent(B,b->A);
1870   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1871   PetscLogObjectParent(B,b->B);
1872 
1873   /* build cache for off array entries formed */
1874   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1875 
1876   PetscFunctionReturn(0);
1877 }
1878 
1879 #undef __FUNCT__
1880 #define __FUNCT__ "MatCreateMPISBAIJ"
1881 /*@C
1882    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1883    (block compressed row).  For good matrix assembly performance
1884    the user should preallocate the matrix storage by setting the parameters
1885    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1886    performance can be increased by more than a factor of 50.
1887 
1888    Collective on MPI_Comm
1889 
1890    Input Parameters:
1891 +  comm - MPI communicator
1892 .  bs   - size of blockk
1893 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1894            This value should be the same as the local size used in creating the
1895            y vector for the matrix-vector product y = Ax.
1896 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1897            This value should be the same as the local size used in creating the
1898            x vector for the matrix-vector product y = Ax.
1899 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1900 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1901 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1902            submatrix  (same for all local rows)
1903 .  d_nnz - array containing the number of block nonzeros in the various block rows
1904            in the upper triangular portion of the in diagonal portion of the local
1905            (possibly different for each block block row) or PETSC_NULL.
1906            You must leave room for the diagonal entry even if it is zero.
1907 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1908            submatrix (same for all local rows).
1909 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1910            off-diagonal portion of the local submatrix (possibly different for
1911            each block row) or PETSC_NULL.
1912 
1913    Output Parameter:
1914 .  A - the matrix
1915 
1916    Options Database Keys:
1917 .   -mat_no_unroll - uses code that does not unroll the loops in the
1918                      block calculations (much slower)
1919 .   -mat_block_size - size of the blocks to use
1920 .   -mat_mpi - use the parallel matrix data structures even on one processor
1921                (defaults to using SeqBAIJ format on one processor)
1922 
1923    Notes:
1924    The user MUST specify either the local or global matrix dimensions
1925    (possibly both).
1926 
1927    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1928    than it must be used on all processors that share the object for that argument.
1929 
1930    Storage Information:
1931    For a square global matrix we define each processor's diagonal portion
1932    to be its local rows and the corresponding columns (a square submatrix);
1933    each processor's off-diagonal portion encompasses the remainder of the
1934    local matrix (a rectangular submatrix).
1935 
1936    The user can specify preallocated storage for the diagonal part of
1937    the local submatrix with either d_nz or d_nnz (not both).  Set
1938    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1939    memory allocation.  Likewise, specify preallocated storage for the
1940    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1941 
1942    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1943    the figure below we depict these three local rows and all columns (0-11).
1944 
1945 .vb
1946            0 1 2 3 4 5 6 7 8 9 10 11
1947           -------------------
1948    row 3  |  o o o d d d o o o o o o
1949    row 4  |  o o o d d d o o o o o o
1950    row 5  |  o o o d d d o o o o o o
1951           -------------------
1952 .ve
1953 
1954    Thus, any entries in the d locations are stored in the d (diagonal)
1955    submatrix, and any entries in the o locations are stored in the
1956    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1957    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1958 
1959    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1960    plus the diagonal part of the d matrix,
1961    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1962    In general, for PDE problems in which most nonzeros are near the diagonal,
1963    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1964    or you will get TERRIBLE performance; see the users' manual chapter on
1965    matrices.
1966 
1967    Level: intermediate
1968 
1969 .keywords: matrix, block, aij, compressed row, sparse, parallel
1970 
1971 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1972 @*/
1973 
1974 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1975 {
1976   int ierr,size;
1977 
1978   PetscFunctionBegin;
1979   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1980   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1981   if (size > 1) {
1982     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1983     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1984   } else {
1985     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1986     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1987   }
1988   PetscFunctionReturn(0);
1989 }
1990 
1991 
1992 #undef __FUNCT__
1993 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1994 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1995 {
1996   Mat          mat;
1997   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1998   int          ierr,len=0;
1999 
2000   PetscFunctionBegin;
2001   *newmat       = 0;
2002   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2003   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2004   mat->preallocated = PETSC_TRUE;
2005   a = (Mat_MPISBAIJ*)mat->data;
2006   a->bs  = oldmat->bs;
2007   a->bs2 = oldmat->bs2;
2008   a->mbs = oldmat->mbs;
2009   a->nbs = oldmat->nbs;
2010   a->Mbs = oldmat->Mbs;
2011   a->Nbs = oldmat->Nbs;
2012 
2013   a->rstart       = oldmat->rstart;
2014   a->rend         = oldmat->rend;
2015   a->cstart       = oldmat->cstart;
2016   a->cend         = oldmat->cend;
2017   a->size         = oldmat->size;
2018   a->rank         = oldmat->rank;
2019   a->donotstash   = oldmat->donotstash;
2020   a->roworiented  = oldmat->roworiented;
2021   a->rowindices   = 0;
2022   a->rowvalues    = 0;
2023   a->getrowactive = PETSC_FALSE;
2024   a->barray       = 0;
2025   a->rstart_bs    = oldmat->rstart_bs;
2026   a->rend_bs      = oldmat->rend_bs;
2027   a->cstart_bs    = oldmat->cstart_bs;
2028   a->cend_bs      = oldmat->cend_bs;
2029 
2030   /* hash table stuff */
2031   a->ht           = 0;
2032   a->hd           = 0;
2033   a->ht_size      = 0;
2034   a->ht_flag      = oldmat->ht_flag;
2035   a->ht_fact      = oldmat->ht_fact;
2036   a->ht_total_ct  = 0;
2037   a->ht_insert_ct = 0;
2038 
2039   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
2040   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
2041   a->cowners    = a->rowners + a->size + 2;
2042   a->rowners_bs = a->cowners + a->size + 2;
2043   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2044   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2045   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2046   if (oldmat->colmap) {
2047 #if defined (PETSC_USE_CTABLE)
2048     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2049 #else
2050     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2051     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2052     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2053 #endif
2054   } else a->colmap = 0;
2055   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2056     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2057     PetscLogObjectMemory(mat,len*sizeof(int));
2058     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2059   } else a->garray = 0;
2060 
2061   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2062   PetscLogObjectParent(mat,a->lvec);
2063   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2064 
2065   PetscLogObjectParent(mat,a->Mvctx);
2066   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2067   PetscLogObjectParent(mat,a->A);
2068   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2069   PetscLogObjectParent(mat,a->B);
2070   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2071   *newmat = mat;
2072   PetscFunctionReturn(0);
2073 }
2074 
2075 #include "petscsys.h"
2076 
2077 EXTERN_C_BEGIN
2078 #undef __FUNCT__
2079 #define __FUNCT__ "MatLoad_MPISBAIJ"
2080 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2081 {
2082   Mat          A;
2083   int          i,nz,ierr,j,rstart,rend,fd;
2084   PetscScalar  *vals,*buf;
2085   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2086   MPI_Status   status;
2087   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2088   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2089   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2090   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2091   int          dcount,kmax,k,nzcount,tmp;
2092 
2093   PetscFunctionBegin;
2094   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2095 
2096   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2097   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2098   if (!rank) {
2099     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2100     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2101     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2102     if (header[3] < 0) {
2103       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2104     }
2105   }
2106 
2107   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2108   M = header[1]; N = header[2];
2109 
2110   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2111 
2112   /*
2113      This code adds extra rows to make sure the number of rows is
2114      divisible by the blocksize
2115   */
2116   Mbs        = M/bs;
2117   extra_rows = bs - M + bs*(Mbs);
2118   if (extra_rows == bs) extra_rows = 0;
2119   else                  Mbs++;
2120   if (extra_rows &&!rank) {
2121     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2122   }
2123 
2124   /* determine ownership of all rows */
2125   mbs        = Mbs/size + ((Mbs % size) > rank);
2126   m          = mbs*bs;
2127   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2128   browners   = rowners + size + 1;
2129   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2130   rowners[0] = 0;
2131   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2132   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2133   rstart = rowners[rank];
2134   rend   = rowners[rank+1];
2135 
2136   /* distribute row lengths to all processors */
2137   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2138   if (!rank) {
2139     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2140     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2141     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2142     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2143     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2144     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2145     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2146   } else {
2147     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2148   }
2149 
2150   if (!rank) {   /* procs[0] */
2151     /* calculate the number of nonzeros on each processor */
2152     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2153     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2154     for (i=0; i<size; i++) {
2155       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2156         procsnz[i] += rowlengths[j];
2157       }
2158     }
2159     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2160 
2161     /* determine max buffer needed and allocate it */
2162     maxnz = 0;
2163     for (i=0; i<size; i++) {
2164       maxnz = PetscMax(maxnz,procsnz[i]);
2165     }
2166     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2167 
2168     /* read in my part of the matrix column indices  */
2169     nz     = procsnz[0];
2170     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2171     mycols = ibuf;
2172     if (size == 1)  nz -= extra_rows;
2173     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2174     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2175 
2176     /* read in every ones (except the last) and ship off */
2177     for (i=1; i<size-1; i++) {
2178       nz   = procsnz[i];
2179       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2180       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2181     }
2182     /* read in the stuff for the last proc */
2183     if (size != 1) {
2184       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2185       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2186       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2187       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2188     }
2189     ierr = PetscFree(cols);CHKERRQ(ierr);
2190   } else {  /* procs[i], i>0 */
2191     /* determine buffer space needed for message */
2192     nz = 0;
2193     for (i=0; i<m; i++) {
2194       nz += locrowlens[i];
2195     }
2196     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2197     mycols = ibuf;
2198     /* receive message of column indices*/
2199     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2200     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2201     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2202   }
2203 
2204   /* loop over local rows, determining number of off diagonal entries */
2205   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2206   odlens   = dlens + (rend-rstart);
2207   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2208   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2209   masked1  = mask    + Mbs;
2210   masked2  = masked1 + Mbs;
2211   rowcount = 0; nzcount = 0;
2212   for (i=0; i<mbs; i++) {
2213     dcount  = 0;
2214     odcount = 0;
2215     for (j=0; j<bs; j++) {
2216       kmax = locrowlens[rowcount];
2217       for (k=0; k<kmax; k++) {
2218         tmp = mycols[nzcount++]/bs; /* block col. index */
2219         if (!mask[tmp]) {
2220           mask[tmp] = 1;
2221           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2222           else masked1[dcount++] = tmp; /* entry in diag portion */
2223         }
2224       }
2225       rowcount++;
2226     }
2227 
2228     dlens[i]  = dcount;  /* d_nzz[i] */
2229     odlens[i] = odcount; /* o_nzz[i] */
2230 
2231     /* zero out the mask elements we set */
2232     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2233     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2234   }
2235 
2236   /* create our matrix */
2237   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2238   CHKERRQ(ierr);
2239   A = *newmat;
2240   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2241 
2242   if (!rank) {
2243     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2244     /* read in my part of the matrix numerical values  */
2245     nz = procsnz[0];
2246     vals = buf;
2247     mycols = ibuf;
2248     if (size == 1)  nz -= extra_rows;
2249     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2250     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2251 
2252     /* insert into matrix */
2253     jj      = rstart*bs;
2254     for (i=0; i<m; i++) {
2255       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2256       mycols += locrowlens[i];
2257       vals   += locrowlens[i];
2258       jj++;
2259     }
2260 
2261     /* read in other processors (except the last one) and ship out */
2262     for (i=1; i<size-1; i++) {
2263       nz   = procsnz[i];
2264       vals = buf;
2265       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2266       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2267     }
2268     /* the last proc */
2269     if (size != 1){
2270       nz   = procsnz[i] - extra_rows;
2271       vals = buf;
2272       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2273       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2274       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2275     }
2276     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2277 
2278   } else {
2279     /* receive numeric values */
2280     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2281 
2282     /* receive message of values*/
2283     vals   = buf;
2284     mycols = ibuf;
2285     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2286     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2287     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2288 
2289     /* insert into matrix */
2290     jj      = rstart*bs;
2291     for (i=0; i<m; i++) {
2292       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2293       mycols += locrowlens[i];
2294       vals   += locrowlens[i];
2295       jj++;
2296     }
2297   }
2298 
2299   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2300   ierr = PetscFree(buf);CHKERRQ(ierr);
2301   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2302   ierr = PetscFree(rowners);CHKERRQ(ierr);
2303   ierr = PetscFree(dlens);CHKERRQ(ierr);
2304   ierr = PetscFree(mask);CHKERRQ(ierr);
2305   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2306   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2307   PetscFunctionReturn(0);
2308 }
2309 EXTERN_C_END
2310 
2311 #undef __FUNCT__
2312 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2313 /*@
2314    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2315 
2316    Input Parameters:
2317 .  mat  - the matrix
2318 .  fact - factor
2319 
2320    Collective on Mat
2321 
2322    Level: advanced
2323 
2324   Notes:
2325    This can also be set by the command line option: -mat_use_hash_table fact
2326 
2327 .keywords: matrix, hashtable, factor, HT
2328 
2329 .seealso: MatSetOption()
2330 @*/
2331 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2332 {
2333   PetscFunctionBegin;
2334   SETERRQ(1,"Function not yet written for SBAIJ format");
2335   /* PetscFunctionReturn(0); */
2336 }
2337 
2338 #undef __FUNCT__
2339 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2340 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2341 {
2342   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2343   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2344   PetscReal    atmp;
2345   PetscReal    *work,*svalues,*rvalues;
2346   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2347   int          rank,size,*rowners_bs,dest,count,source;
2348   PetscScalar  *va;
2349   MatScalar    *ba;
2350   MPI_Status   stat;
2351 
2352   PetscFunctionBegin;
2353   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2354   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2355 
2356   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
2357   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
2358 
2359   bs   = a->bs;
2360   mbs  = a->mbs;
2361   Mbs  = a->Mbs;
2362   ba   = b->a;
2363   bi   = b->i;
2364   bj   = b->j;
2365   /*
2366   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2367   PetscSynchronizedFlush(PETSC_COMM_WORLD);
2368   */
2369 
2370   /* find ownerships */
2371   rowners_bs = a->rowners_bs;
2372   /*
2373   if (!rank){
2374     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2375   }
2376   */
2377 
2378   /* each proc creates an array to be distributed */
2379   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2380   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2381 
2382   /* row_max for B */
2383   if (rank != size-1){
2384     for (i=0; i<mbs; i++) {
2385       ncols = bi[1] - bi[0]; bi++;
2386       brow  = bs*i;
2387       for (j=0; j<ncols; j++){
2388         bcol = bs*(*bj);
2389         for (kcol=0; kcol<bs; kcol++){
2390           col = bcol + kcol;                 /* local col index */
2391           col += rowners_bs[rank+1];      /* global col index */
2392           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2393           for (krow=0; krow<bs; krow++){
2394             atmp = PetscAbsScalar(*ba); ba++;
2395             row = brow + krow;    /* local row index */
2396             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2397             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2398             if (work[col] < atmp) work[col] = atmp;
2399           }
2400         }
2401         bj++;
2402       }
2403     }
2404     /*
2405       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2406       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2407       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2408       */
2409 
2410     /* send values to its owners */
2411     for (dest=rank+1; dest<size; dest++){
2412       svalues = work + rowners_bs[dest];
2413       count   = rowners_bs[dest+1]-rowners_bs[dest];
2414       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr);
2415       /*
2416       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2417       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2418       */
2419     }
2420   }
2421 
2422   /* receive values */
2423   if (rank){
2424     rvalues = work;
2425     count   = rowners_bs[rank+1]-rowners_bs[rank];
2426     for (source=0; source<rank; source++){
2427       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr);
2428       /* process values */
2429       for (i=0; i<count; i++){
2430         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2431       }
2432       /*
2433       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2434       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2435       */
2436     }
2437   }
2438 
2439   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2440   ierr = PetscFree(work);CHKERRQ(ierr);
2441   PetscFunctionReturn(0);
2442 }
2443 
2444 #undef __FUNCT__
2445 #define __FUNCT__ "MatRelax_MPISBAIJ"
2446 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2447 {
2448   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2449   int            ierr,mbs=mat->mbs,bs=mat->bs;
2450   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2451   Vec            bb1;
2452 
2453   PetscFunctionBegin;
2454   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2455   if (bs > 1)
2456     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2457 
2458   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2459     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2460       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2461       its--;
2462     }
2463 
2464     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2465     while (its--){
2466 
2467       /* lower triangular part: slvec0b = - B^T*xx */
2468       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2469 
2470       /* copy xx into slvec0a */
2471       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2472       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2473       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2474       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2475 
2476       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2477 
2478       /* copy bb into slvec1a */
2479       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2480       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2481       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2482       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2483 
2484       /* set slvec1b = 0 */
2485       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2486 
2487       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2488       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2489       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2490       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2491 
2492       /* upper triangular part: bb1 = bb1 - B*x */
2493       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2494 
2495       /* local diagonal sweep */
2496       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2497     }
2498     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2499   } else {
2500     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2501   }
2502   PetscFunctionReturn(0);
2503 }
2504 
2505 #undef __FUNCT__
2506 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2507 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2508 {
2509   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2510   int            ierr;
2511   PetscScalar    mone=-1.0;
2512   Vec            lvec1,bb1;
2513 
2514   PetscFunctionBegin;
2515   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2516   if (mat->bs > 1)
2517     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2518 
2519   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2520     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2521       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2522       its--;
2523     }
2524 
2525     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2526     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2527     while (its--){
2528       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2529 
2530       /* lower diagonal part: bb1 = bb - B^T*xx */
2531       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2532       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2533 
2534       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2535       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2536       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2537 
2538       /* upper diagonal part: bb1 = bb1 - B*x */
2539       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2540       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2541 
2542       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2543 
2544       /* diagonal sweep */
2545       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2546     }
2547     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2548     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2549   } else {
2550     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2551   }
2552   PetscFunctionReturn(0);
2553 }
2554 
2555