xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 40781036cb61568352ccff04d60a627167f497bd)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS *,int);
12 extern int MatGetSubMatrices_MPISBAIJ(Mat,int,IS *,IS *,MatReuse,Mat **);
13 extern int MatGetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *);
14 extern int MatSetValues_SeqSBAIJ(Mat,int,int *,int,int *,PetscScalar *,InsertMode);
15 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,int*,int,int*,PetscScalar*,InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,int*,int,int*,MatScalar*,InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 /*
76      Local utility routine that creates a mapping from the global column
77    number to the local number in the off-diagonal part of the local
78    storage of the matrix.  This is done in a non scable way since the
79    length of colmap equals the global matrix length.
80 */
81 #undef __FUNCT__
82 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
83 static int CreateColmap_MPISBAIJ_Private(Mat mat)
84 {
85   PetscFunctionBegin;
86   SETERRQ(1,"Function not yet written for SBAIJ format");
87   /* PetscFunctionReturn(0); */
88 }
89 
90 #define CHUNKSIZE  10
91 
92 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
93 { \
94  \
95     brow = row/bs;  \
96     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
97     rmax = aimax[brow]; nrow = ailen[brow]; \
98       bcol = col/bs; \
99       ridx = row % bs; cidx = col % bs; \
100       low = 0; high = nrow; \
101       while (high-low > 3) { \
102         t = (low+high)/2; \
103         if (rp[t] > bcol) high = t; \
104         else              low  = t; \
105       } \
106       for (_i=low; _i<high; _i++) { \
107         if (rp[_i] > bcol) break; \
108         if (rp[_i] == bcol) { \
109           bap  = ap +  bs2*_i + bs*cidx + ridx; \
110           if (addv == ADD_VALUES) *bap += value;  \
111           else                    *bap  = value;  \
112           goto a_noinsert; \
113         } \
114       } \
115       if (a->nonew == 1) goto a_noinsert; \
116       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
117       if (nrow >= rmax) { \
118         /* there is no extra room in row, therefore enlarge */ \
119         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
120         MatScalar *new_a; \
121  \
122         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
123  \
124         /* malloc new storage space */ \
125         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
126         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
127         new_j = (int*)(new_a + bs2*new_nz); \
128         new_i = new_j + new_nz; \
129  \
130         /* copy over old data into new slots */ \
131         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
132         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
133         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
134         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
135         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
137         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
138         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
139                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
140         /* free up old matrix storage */ \
141         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
142         if (!a->singlemalloc) { \
143           ierr = PetscFree(a->i);CHKERRQ(ierr); \
144           ierr = PetscFree(a->j);CHKERRQ(ierr);\
145         } \
146         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
147         a->singlemalloc = PETSC_TRUE; \
148  \
149         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
150         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
151         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
152         a->s_maxnz += bs2*CHUNKSIZE; \
153         a->reallocs++; \
154         a->s_nz++; \
155       } \
156       N = nrow++ - 1;  \
157       /* shift up all the later entries in this row */ \
158       for (ii=N; ii>=_i; ii--) { \
159         rp[ii+1] = rp[ii]; \
160         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
161       } \
162       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
163       rp[_i]                      = bcol;  \
164       ap[bs2*_i + bs*cidx + ridx] = value;  \
165       a_noinsert:; \
166     ailen[brow] = nrow; \
167 }
168 #ifndef MatSetValues_SeqBAIJ_B_Private
169 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
170 { \
171     brow = row/bs;  \
172     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
173     rmax = bimax[brow]; nrow = bilen[brow]; \
174       bcol = col/bs; \
175       ridx = row % bs; cidx = col % bs; \
176       low = 0; high = nrow; \
177       while (high-low > 3) { \
178         t = (low+high)/2; \
179         if (rp[t] > bcol) high = t; \
180         else              low  = t; \
181       } \
182       for (_i=low; _i<high; _i++) { \
183         if (rp[_i] > bcol) break; \
184         if (rp[_i] == bcol) { \
185           bap  = ap +  bs2*_i + bs*cidx + ridx; \
186           if (addv == ADD_VALUES) *bap += value;  \
187           else                    *bap  = value;  \
188           goto b_noinsert; \
189         } \
190       } \
191       if (b->nonew == 1) goto b_noinsert; \
192       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
193       if (nrow >= rmax) { \
194         /* there is no extra room in row, therefore enlarge */ \
195         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
196         MatScalar *new_a; \
197  \
198         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
199  \
200         /* malloc new storage space */ \
201         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
202         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
203         new_j = (int*)(new_a + bs2*new_nz); \
204         new_i = new_j + new_nz; \
205  \
206         /* copy over old data into new slots */ \
207         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
208         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
209         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
210         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
211         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
213         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
214         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
215                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
216         /* free up old matrix storage */ \
217         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
218         if (!b->singlemalloc) { \
219           ierr = PetscFree(b->i);CHKERRQ(ierr); \
220           ierr = PetscFree(b->j);CHKERRQ(ierr); \
221         } \
222         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
223         b->singlemalloc = PETSC_TRUE; \
224  \
225         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
226         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
227         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
228         b->maxnz += bs2*CHUNKSIZE; \
229         b->reallocs++; \
230         b->nz++; \
231       } \
232       N = nrow++ - 1;  \
233       /* shift up all the later entries in this row */ \
234       for (ii=N; ii>=_i; ii--) { \
235         rp[ii+1] = rp[ii]; \
236         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
237       } \
238       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
239       rp[_i]                      = bcol;  \
240       ap[bs2*_i + bs*cidx + ridx] = value;  \
241       b_noinsert:; \
242     bilen[brow] = nrow; \
243 }
244 #endif
245 
246 #if defined(PETSC_USE_MAT_SINGLE)
247 #undef __FUNCT__
248 #define __FUNCT__ "MatSetValues_MPISBAIJ"
249 int MatSetValues_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
250 {
251   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
252   int          ierr,i,N = m*n;
253   MatScalar    *vsingle;
254 
255   PetscFunctionBegin;
256   if (N > b->setvalueslen) {
257     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
258     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
259     b->setvalueslen  = N;
260   }
261   vsingle = b->setvaluescopy;
262 
263   for (i=0; i<N; i++) {
264     vsingle[i] = v[i];
265   }
266   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
267   PetscFunctionReturn(0);
268 }
269 
270 #undef __FUNCT__
271 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
272 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
273 {
274   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
275   int         ierr,i,N = m*n*b->bs2;
276   MatScalar   *vsingle;
277 
278   PetscFunctionBegin;
279   if (N > b->setvalueslen) {
280     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
281     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
282     b->setvalueslen  = N;
283   }
284   vsingle = b->setvaluescopy;
285   for (i=0; i<N; i++) {
286     vsingle[i] = v[i];
287   }
288   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
289   PetscFunctionReturn(0);
290 }
291 
292 #undef __FUNCT__
293 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
294 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
295 {
296   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
297   int         ierr,i,N = m*n;
298   MatScalar   *vsingle;
299 
300   PetscFunctionBegin;
301   SETERRQ(1,"Function not yet written for SBAIJ format");
302   /* PetscFunctionReturn(0); */
303 }
304 
305 #undef __FUNCT__
306 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
307 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,int *im,int n,int *in,PetscScalar *v,InsertMode addv)
308 {
309   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
310   int         ierr,i,N = m*n*b->bs2;
311   MatScalar   *vsingle;
312 
313   PetscFunctionBegin;
314   SETERRQ(1,"Function not yet written for SBAIJ format");
315   /* PetscFunctionReturn(0); */
316 }
317 #endif
318 
319 /* Only add/insert a(i,j) with i<=j (blocks).
320    Any a(i,j) with i>j input by user is ingored.
321 */
322 #undef __FUNCT__
323 #define __FUNCT__ "MatSetValues_MPIBAIJ"
324 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
325 {
326   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
327   MatScalar    value;
328   PetscTruth   roworiented = baij->roworiented;
329   int          ierr,i,j,row,col;
330   int          rstart_orig=baij->rstart_bs;
331   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
332   int          cend_orig=baij->cend_bs,bs=baij->bs;
333 
334   /* Some Variables required in the macro */
335   Mat          A = baij->A;
336   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
337   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
338   MatScalar    *aa=a->a;
339 
340   Mat          B = baij->B;
341   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
342   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
343   MatScalar    *ba=b->a;
344 
345   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
346   int          low,high,t,ridx,cidx,bs2=a->bs2;
347   MatScalar    *ap,*bap;
348 
349   /* for stash */
350   int          n_loc, *in_loc=0;
351   MatScalar    *v_loc=0;
352 
353   PetscFunctionBegin;
354 
355   if(!baij->donotstash){
356     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
357     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
358   }
359 
360   for (i=0; i<m; i++) {
361     if (im[i] < 0) continue;
362 #if defined(PETSC_USE_BOPT_g)
363     if (im[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
364 #endif
365     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
366       row = im[i] - rstart_orig;              /* local row index */
367       for (j=0; j<n; j++) {
368         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
369         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
370           col = in[j] - cstart_orig;          /* local col index */
371           brow = row/bs; bcol = col/bs;
372           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
373           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
374           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
375           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
376         } else if (in[j] < 0) continue;
377 #if defined(PETSC_USE_BOPT_g)
378         else if (in[j] >= mat->N) {SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Col too large");}
379 #endif
380         else {  /* off-diag entry (B) */
381           if (mat->was_assembled) {
382             if (!baij->colmap) {
383               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
384             }
385 #if defined (PETSC_USE_CTABLE)
386             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
387             col  = col - 1;
388 #else
389             col = baij->colmap[in[j]/bs] - 1;
390 #endif
391             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
392               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
393               col =  in[j];
394               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
395               B = baij->B;
396               b = (Mat_SeqBAIJ*)(B)->data;
397               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
398               ba=b->a;
399             } else col += in[j]%bs;
400           } else col = in[j];
401           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
402           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
403           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
404         }
405       }
406     } else {  /* off processor entry */
407       if (!baij->donotstash) {
408         n_loc = 0;
409         for (j=0; j<n; j++){
410           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
411           in_loc[n_loc] = in[j];
412           if (roworiented) {
413             v_loc[n_loc] = v[i*n+j];
414           } else {
415             v_loc[n_loc] = v[j*m+i];
416           }
417           n_loc++;
418         }
419         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
420       }
421     }
422   }
423 
424   if(!baij->donotstash){
425     ierr = PetscFree(in_loc);CHKERRQ(ierr);
426     ierr = PetscFree(v_loc);CHKERRQ(ierr);
427   }
428   PetscFunctionReturn(0);
429 }
430 
431 #undef __FUNCT__
432 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
433 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
434 {
435   PetscFunctionBegin;
436   SETERRQ(1,"Function not yet written for SBAIJ format");
437   /* PetscFunctionReturn(0); */
438 }
439 
440 #define HASH_KEY 0.6180339887
441 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
442 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
443 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
444 #undef __FUNCT__
445 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
446 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
447 {
448   PetscFunctionBegin;
449   SETERRQ(1,"Function not yet written for SBAIJ format");
450   /* PetscFunctionReturn(0); */
451 }
452 
453 #undef __FUNCT__
454 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
455 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,int *im,int n,int *in,MatScalar *v,InsertMode addv)
456 {
457   PetscFunctionBegin;
458   SETERRQ(1,"Function not yet written for SBAIJ format");
459   /* PetscFunctionReturn(0); */
460 }
461 
462 #undef __FUNCT__
463 #define __FUNCT__ "MatGetValues_MPISBAIJ"
464 int MatGetValues_MPISBAIJ(Mat mat,int m,int *idxm,int n,int *idxn,PetscScalar *v)
465 {
466   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
467   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
468   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
469 
470   PetscFunctionBegin;
471   for (i=0; i<m; i++) {
472     if (idxm[i] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative row");
473     if (idxm[i] >= mat->M) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Row too large");
474     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
475       row = idxm[i] - bsrstart;
476       for (j=0; j<n; j++) {
477         if (idxn[j] < 0) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Negative column");
478         if (idxn[j] >= mat->N) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Column too large");
479         if (idxn[j] >= bscstart && idxn[j] < bscend){
480           col = idxn[j] - bscstart;
481           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
482         } else {
483           if (!baij->colmap) {
484             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
485           }
486 #if defined (PETSC_USE_CTABLE)
487           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
488           data --;
489 #else
490           data = baij->colmap[idxn[j]/bs]-1;
491 #endif
492           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
493           else {
494             col  = data + idxn[j]%bs;
495             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
496           }
497         }
498       }
499     } else {
500       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
501     }
502   }
503  PetscFunctionReturn(0);
504 }
505 
506 #undef __FUNCT__
507 #define __FUNCT__ "MatNorm_MPISBAIJ"
508 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
509 {
510   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
511   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
512   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
513   int        ierr;
514   PetscReal  sum[2],*lnorm2;
515 
516   PetscFunctionBegin;
517   if (baij->size == 1) {
518     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
519   } else {
520     if (type == NORM_FROBENIUS) {
521       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
522       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
523       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
524       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
525       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
526       /*
527       ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
528       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
529       */
530       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
531       /*
532       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
533       PetscSynchronizedFlush(PETSC_COMM_WORLD); */
534 
535       *norm = sqrt(sum[0] + 2*sum[1]);
536       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 /*
545   Creates the hash table, and sets the table
546   This table is created only once.
547   If new entried need to be added to the matrix
548   then the hash table has to be destroyed and
549   recreated.
550 */
551 #undef __FUNCT__
552 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
553 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
554 {
555   PetscFunctionBegin;
556   SETERRQ(1,"Function not yet written for SBAIJ format");
557   /* PetscFunctionReturn(0); */
558 }
559 
560 #undef __FUNCT__
561 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
562 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
563 {
564   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
565   int         ierr,nstash,reallocs;
566   InsertMode  addv;
567 
568   PetscFunctionBegin;
569   if (baij->donotstash) {
570     PetscFunctionReturn(0);
571   }
572 
573   /* make sure all processors are either in INSERTMODE or ADDMODE */
574   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
575   if (addv == (ADD_VALUES|INSERT_VALUES)) {
576     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
577   }
578   mat->insertmode = addv; /* in case this processor had no cache */
579 
580   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
581   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
582   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
583   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
584   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
585   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
586   PetscFunctionReturn(0);
587 }
588 
589 #undef __FUNCT__
590 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
591 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
592 {
593   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
594   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
595   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
596   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
597   int         *row,*col,other_disassembled;
598   PetscTruth  r1,r2,r3;
599   MatScalar   *val;
600   InsertMode  addv = mat->insertmode;
601 
602   PetscFunctionBegin;
603 
604   if (!baij->donotstash) {
605     while (1) {
606       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
607       /*
608       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
609       PetscSynchronizedFlush(PETSC_COMM_WORLD);
610       */
611       if (!flg) break;
612 
613       for (i=0; i<n;) {
614         /* Now identify the consecutive vals belonging to the same row */
615         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
616         if (j < n) ncols = j-i;
617         else       ncols = n-i;
618         /* Now assemble all these values with a single function call */
619         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
620         i = j;
621       }
622     }
623     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
624     /* Now process the block-stash. Since the values are stashed column-oriented,
625        set the roworiented flag to column oriented, and after MatSetValues()
626        restore the original flags */
627     r1 = baij->roworiented;
628     r2 = a->roworiented;
629     r3 = b->roworiented;
630     baij->roworiented = PETSC_FALSE;
631     a->roworiented    = PETSC_FALSE;
632     b->roworiented    = PETSC_FALSE;
633     while (1) {
634       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
635       if (!flg) break;
636 
637       for (i=0; i<n;) {
638         /* Now identify the consecutive vals belonging to the same row */
639         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
640         if (j < n) ncols = j-i;
641         else       ncols = n-i;
642         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
643         i = j;
644       }
645     }
646     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
647     baij->roworiented = r1;
648     a->roworiented    = r2;
649     b->roworiented    = r3;
650   }
651 
652   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
653   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
654 
655   /* determine if any processor has disassembled, if so we must
656      also disassemble ourselfs, in order that we may reassemble. */
657   /*
658      if nonzero structure of submatrix B cannot change then we know that
659      no processor disassembled thus we can skip this stuff
660   */
661   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
662     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
663     if (mat->was_assembled && !other_disassembled) {
664       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
665     }
666   }
667 
668   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
669     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
670   }
671   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
672   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
673 
674 #if defined(PETSC_USE_BOPT_g)
675   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
676     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
677     baij->ht_total_ct  = 0;
678     baij->ht_insert_ct = 0;
679   }
680 #endif
681   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
682     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
683     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
684     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
685   }
686 
687   if (baij->rowvalues) {
688     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
689     baij->rowvalues = 0;
690   }
691   PetscFunctionReturn(0);
692 }
693 
694 #undef __FUNCT__
695 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
696 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
697 {
698   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
699   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
700   PetscTruth        isascii,isdraw;
701   PetscViewer       sviewer;
702   PetscViewerFormat format;
703 
704   PetscFunctionBegin;
705   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
706   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
707   if (isascii) {
708     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
709     if (format == PETSC_VIEWER_ASCII_INFO_LONG) {
710       MatInfo info;
711       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
712       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
713       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
714               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
715               baij->bs,(int)info.memory);CHKERRQ(ierr);
716       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
717       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
718       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
719       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
720       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
721       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
722       PetscFunctionReturn(0);
723     } else if (format == PETSC_VIEWER_ASCII_INFO) {
724       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
725       PetscFunctionReturn(0);
726     }
727   }
728 
729   if (isdraw) {
730     PetscDraw       draw;
731     PetscTruth isnull;
732     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
733     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
734   }
735 
736   if (size == 1) {
737     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
738     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
739   } else {
740     /* assemble the entire matrix onto first processor. */
741     Mat         A;
742     Mat_SeqSBAIJ *Aloc;
743     Mat_SeqBAIJ *Bloc;
744     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
745     MatScalar   *a;
746 
747     if (!rank) {
748       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
749     } else {
750       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
751     }
752     PetscLogObjectParent(mat,A);
753 
754     /* copy over the A part */
755     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
756     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
757     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
758 
759     for (i=0; i<mbs; i++) {
760       rvals[0] = bs*(baij->rstart + i);
761       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
762       for (j=ai[i]; j<ai[i+1]; j++) {
763         col = (baij->cstart+aj[j])*bs;
764         for (k=0; k<bs; k++) {
765           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
766           col++; a += bs;
767         }
768       }
769     }
770     /* copy over the B part */
771     Bloc = (Mat_SeqBAIJ*)baij->B->data;
772     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
773     for (i=0; i<mbs; i++) {
774       rvals[0] = bs*(baij->rstart + i);
775       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
776       for (j=ai[i]; j<ai[i+1]; j++) {
777         col = baij->garray[aj[j]]*bs;
778         for (k=0; k<bs; k++) {
779           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
780           col++; a += bs;
781         }
782       }
783     }
784     ierr = PetscFree(rvals);CHKERRQ(ierr);
785     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
786     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
787     /*
788        Everyone has to call to draw the matrix since the graphics waits are
789        synchronized across all processors that share the PetscDraw object
790     */
791     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
792     if (!rank) {
793       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
794       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
795     }
796     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
797     ierr = MatDestroy(A);CHKERRQ(ierr);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatView_MPISBAIJ"
804 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
805 {
806   int        ierr;
807   PetscTruth isascii,isdraw,issocket,isbinary;
808 
809   PetscFunctionBegin;
810   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
811   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
814   if (isascii || isdraw || issocket || isbinary) {
815     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
816   } else {
817     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
818   }
819   PetscFunctionReturn(0);
820 }
821 
822 #undef __FUNCT__
823 #define __FUNCT__ "MatDestroy_MPISBAIJ"
824 int MatDestroy_MPISBAIJ(Mat mat)
825 {
826   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
827   int         ierr;
828 
829   PetscFunctionBegin;
830 #if defined(PETSC_USE_LOG)
831   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
832 #endif
833   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
834   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
835   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
836   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
837   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
838 #if defined (PETSC_USE_CTABLE)
839   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
840 #else
841   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
842 #endif
843   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
844   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
845   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
846   if (baij->slvec0) {
847     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
848     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
849   }
850   if (baij->slvec1) {
851     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
852     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
853     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
854   }
855   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
856   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
857   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
858   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
859 #if defined(PETSC_USE_MAT_SINGLE)
860   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
861 #endif
862   ierr = PetscFree(baij);CHKERRQ(ierr);
863   PetscFunctionReturn(0);
864 }
865 
866 #undef __FUNCT__
867 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
868 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
869 {
870   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
871   int         ierr,nt;
872 
873   PetscFunctionBegin;
874   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
875   if (nt != A->n) {
876     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
877   }
878   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
879   if (nt != A->m) {
880     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
881   }
882 
883   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
884   /* do diagonal part */
885   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
886   /* do supperdiagonal part */
887   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
888   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
889   /* do subdiagonal part */
890   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
891   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
892   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
893 
894   PetscFunctionReturn(0);
895 }
896 
897 #undef __FUNCT__
898 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
899 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
900 {
901   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
902   int        ierr;
903 
904   PetscFunctionBegin;
905   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
906   /* do diagonal part */
907   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
908   /* do supperdiagonal part */
909   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
910   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
911 
912   /* do subdiagonal part */
913   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
914   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
915   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
916 
917   PetscFunctionReturn(0);
918 }
919 
920 #undef __FUNCT__
921 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
922 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
923 {
924   PetscFunctionBegin;
925   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
926   /* PetscFunctionReturn(0); */
927 }
928 
929 #undef __FUNCT__
930 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
931 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
932 {
933   PetscFunctionBegin;
934   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
935   /* PetscFunctionReturn(0); */
936 }
937 
938 /*
939   This only works correctly for square matrices where the subblock A->A is the
940    diagonal block
941 */
942 #undef __FUNCT__
943 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
944 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
945 {
946   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
947   int         ierr;
948 
949   PetscFunctionBegin;
950   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
951   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
952   PetscFunctionReturn(0);
953 }
954 
955 #undef __FUNCT__
956 #define __FUNCT__ "MatScale_MPISBAIJ"
957 int MatScale_MPISBAIJ(PetscScalar *aa,Mat A)
958 {
959   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
960   int         ierr;
961 
962   PetscFunctionBegin;
963   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
964   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
965   PetscFunctionReturn(0);
966 }
967 
968 #undef __FUNCT__
969 #define __FUNCT__ "MatGetRow_MPISBAIJ"
970 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
971 {
972   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
973   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
974   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
975   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
976   int            *cmap,*idx_p,cstart = mat->cstart;
977 
978   PetscFunctionBegin;
979   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
980   mat->getrowactive = PETSC_TRUE;
981 
982   if (!mat->rowvalues && (idx || v)) {
983     /*
984         allocate enough space to hold information from the longest row.
985     */
986     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
987     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
988     int     max = 1,mbs = mat->mbs,tmp;
989     for (i=0; i<mbs; i++) {
990       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
991       if (max < tmp) { max = tmp; }
992     }
993     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
994     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
995   }
996 
997   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
998   lrow = row - brstart;  /* local row index */
999 
1000   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1001   if (!v)   {pvA = 0; pvB = 0;}
1002   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1003   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1004   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1005   nztot = nzA + nzB;
1006 
1007   cmap  = mat->garray;
1008   if (v  || idx) {
1009     if (nztot) {
1010       /* Sort by increasing column numbers, assuming A and B already sorted */
1011       int imark = -1;
1012       if (v) {
1013         *v = v_p = mat->rowvalues;
1014         for (i=0; i<nzB; i++) {
1015           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1016           else break;
1017         }
1018         imark = i;
1019         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1020         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1021       }
1022       if (idx) {
1023         *idx = idx_p = mat->rowindices;
1024         if (imark > -1) {
1025           for (i=0; i<imark; i++) {
1026             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1027           }
1028         } else {
1029           for (i=0; i<nzB; i++) {
1030             if (cmap[cworkB[i]/bs] < cstart)
1031               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1032             else break;
1033           }
1034           imark = i;
1035         }
1036         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1037         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1038       }
1039     } else {
1040       if (idx) *idx = 0;
1041       if (v)   *v   = 0;
1042     }
1043   }
1044   *nz = nztot;
1045   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1046   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1047   PetscFunctionReturn(0);
1048 }
1049 
1050 #undef __FUNCT__
1051 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1052 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1053 {
1054   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1055 
1056   PetscFunctionBegin;
1057   if (baij->getrowactive == PETSC_FALSE) {
1058     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1059   }
1060   baij->getrowactive = PETSC_FALSE;
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1066 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1067 {
1068   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1069 
1070   PetscFunctionBegin;
1071   *bs = baij->bs;
1072   PetscFunctionReturn(0);
1073 }
1074 
1075 #undef __FUNCT__
1076 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1077 int MatZeroEntries_MPISBAIJ(Mat A)
1078 {
1079   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1080   int         ierr;
1081 
1082   PetscFunctionBegin;
1083   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1084   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1090 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1091 {
1092   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1093   Mat         A = a->A,B = a->B;
1094   int         ierr;
1095   PetscReal   isend[5],irecv[5];
1096 
1097   PetscFunctionBegin;
1098   info->block_size     = (PetscReal)a->bs;
1099   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1100   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1101   isend[3] = info->memory;  isend[4] = info->mallocs;
1102   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1103   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1104   isend[3] += info->memory;  isend[4] += info->mallocs;
1105   if (flag == MAT_LOCAL) {
1106     info->nz_used      = isend[0];
1107     info->nz_allocated = isend[1];
1108     info->nz_unneeded  = isend[2];
1109     info->memory       = isend[3];
1110     info->mallocs      = isend[4];
1111   } else if (flag == MAT_GLOBAL_MAX) {
1112     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1113     info->nz_used      = irecv[0];
1114     info->nz_allocated = irecv[1];
1115     info->nz_unneeded  = irecv[2];
1116     info->memory       = irecv[3];
1117     info->mallocs      = irecv[4];
1118   } else if (flag == MAT_GLOBAL_SUM) {
1119     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1120     info->nz_used      = irecv[0];
1121     info->nz_allocated = irecv[1];
1122     info->nz_unneeded  = irecv[2];
1123     info->memory       = irecv[3];
1124     info->mallocs      = irecv[4];
1125   } else {
1126     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1127   }
1128   info->rows_global       = (PetscReal)A->M;
1129   info->columns_global    = (PetscReal)A->N;
1130   info->rows_local        = (PetscReal)A->m;
1131   info->columns_local     = (PetscReal)A->N;
1132   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1133   info->fill_ratio_needed = 0;
1134   info->factor_mallocs    = 0;
1135   PetscFunctionReturn(0);
1136 }
1137 
1138 #undef __FUNCT__
1139 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1140 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1141 {
1142   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1143   int         ierr;
1144 
1145   PetscFunctionBegin;
1146   switch (op) {
1147   case MAT_NO_NEW_NONZERO_LOCATIONS:
1148   case MAT_YES_NEW_NONZERO_LOCATIONS:
1149   case MAT_COLUMNS_UNSORTED:
1150   case MAT_COLUMNS_SORTED:
1151   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1152   case MAT_KEEP_ZEROED_ROWS:
1153   case MAT_NEW_NONZERO_LOCATION_ERR:
1154     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1155     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1156     break;
1157   case MAT_ROW_ORIENTED:
1158     a->roworiented = PETSC_TRUE;
1159     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1160     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1161     break;
1162   case MAT_ROWS_SORTED:
1163   case MAT_ROWS_UNSORTED:
1164   case MAT_YES_NEW_DIAGONALS:
1165   case MAT_USE_SINGLE_PRECISION_SOLVES:
1166     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1167     break;
1168   case MAT_COLUMN_ORIENTED:
1169     a->roworiented = PETSC_FALSE;
1170     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1171     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1172     break;
1173   case MAT_IGNORE_OFF_PROC_ENTRIES:
1174     a->donotstash = PETSC_TRUE;
1175     break;
1176   case MAT_NO_NEW_DIAGONALS:
1177     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1178   case MAT_USE_HASH_TABLE:
1179     a->ht_flag = PETSC_TRUE;
1180     break;
1181   default:
1182     SETERRQ(PETSC_ERR_SUP,"unknown option");
1183   }
1184   PetscFunctionReturn(0);
1185 }
1186 
1187 #undef __FUNCT__
1188 #define __FUNCT__ "MatTranspose_MPISBAIJ("
1189 int MatTranspose_MPISBAIJ(Mat A,Mat *matout)
1190 {
1191   PetscFunctionBegin;
1192   SETERRQ(1,"Matrix is symmetric. MatTranspose() should not be called");
1193   /* PetscFunctionReturn(0); */
1194 }
1195 
1196 #undef __FUNCT__
1197 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1198 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1199 {
1200   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1201   Mat         a = baij->A,b = baij->B;
1202   int         ierr,s1,s2,s3;
1203 
1204   PetscFunctionBegin;
1205   if (ll != rr) {
1206     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1207   }
1208   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1209   if (rr) {
1210     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1211     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1212     /* Overlap communication with computation. */
1213     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1214     /*} if (ll) { */
1215     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1216     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1217     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1218     /* } */
1219   /* scale  the diagonal block */
1220   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1221 
1222   /* if (rr) { */
1223     /* Do a scatter end and then right scale the off-diagonal block */
1224     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1225     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1226   }
1227 
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1233 int MatZeroRows_MPISBAIJ(Mat A,IS is,PetscScalar *diag)
1234 {
1235   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1236   int            i,ierr,N,*rows,*owners = l->rowners,size = l->size;
1237   int            *procs,*nprocs,j,idx,nsends,*work,row;
1238   int            nmax,*svalues,*starts,*owner,nrecvs,rank = l->rank;
1239   int            *rvalues,tag = A->tag,count,base,slen,n,*source;
1240   int            *lens,imdex,*lrows,*values,bs=l->bs,rstart_bs=l->rstart_bs;
1241   MPI_Comm       comm = A->comm;
1242   MPI_Request    *send_waits,*recv_waits;
1243   MPI_Status     recv_status,*send_status;
1244   IS             istmp;
1245   PetscTruth     found;
1246 
1247   PetscFunctionBegin;
1248   ierr = ISGetSize(is,&N);CHKERRQ(ierr);
1249   ierr = ISGetIndices(is,&rows);CHKERRQ(ierr);
1250 
1251   /*  first count number of contributors to each processor */
1252   ierr  = PetscMalloc(2*size*sizeof(int),&nprocs);CHKERRQ(ierr);
1253   ierr  = PetscMemzero(nprocs,2*size*sizeof(int));CHKERRQ(ierr);
1254   procs = nprocs + size;
1255   ierr  = PetscMalloc((N+1)*sizeof(int),&owner);CHKERRQ(ierr); /* see note*/
1256   for (i=0; i<N; i++) {
1257     idx   = rows[i];
1258     found = PETSC_FALSE;
1259     for (j=0; j<size; j++) {
1260       if (idx >= owners[j]*bs && idx < owners[j+1]*bs) {
1261         nprocs[j]++; procs[j] = 1; owner[i] = j; found = PETSC_TRUE; break;
1262       }
1263     }
1264     if (!found) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Index out of range");
1265   }
1266   nsends = 0;  for (i=0; i<size; i++) { nsends += procs[i];}
1267 
1268   /* inform other processors of number of messages and max length*/
1269   ierr   = PetscMalloc(2*size*sizeof(int),&work);CHKERRQ(ierr);
1270   ierr   = MPI_Allreduce(nprocs,work,2*size,MPI_INT,PetscMaxSum_Op,comm);CHKERRQ(ierr);
1271   nmax   = work[rank];
1272   nrecvs = work[size+rank];
1273   ierr   = PetscFree(work);CHKERRQ(ierr);
1274 
1275   /* post receives:   */
1276   ierr = PetscMalloc((nrecvs+1)*(nmax+1)*sizeof(int),&rvalues);CHKERRQ(ierr);
1277   ierr = PetscMalloc((nrecvs+1)*sizeof(MPI_Request),&recv_waits);CHKERRQ(ierr);
1278   for (i=0; i<nrecvs; i++) {
1279     ierr = MPI_Irecv(rvalues+nmax*i,nmax,MPI_INT,MPI_ANY_SOURCE,tag,comm,recv_waits+i);CHKERRQ(ierr);
1280   }
1281 
1282   /* do sends:
1283      1) starts[i] gives the starting index in svalues for stuff going to
1284      the ith processor
1285   */
1286   ierr = PetscMalloc((N+1)*sizeof(int),&svalues);CHKERRQ(ierr);
1287   ierr = PetscMalloc((nsends+1)*sizeof(MPI_Request),&send_waits);CHKERRQ(ierr);
1288   ierr = PetscMalloc((size+1)*sizeof(int),&starts);CHKERRQ(ierr);
1289   starts[0]  = 0;
1290   for (i=1; i<size; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1291   for (i=0; i<N; i++) {
1292     svalues[starts[owner[i]]++] = rows[i];
1293   }
1294   ierr = ISRestoreIndices(is,&rows);CHKERRQ(ierr);
1295 
1296   starts[0] = 0;
1297   for (i=1; i<size+1; i++) { starts[i] = starts[i-1] + nprocs[i-1];}
1298   count = 0;
1299   for (i=0; i<size; i++) {
1300     if (procs[i]) {
1301       ierr = MPI_Isend(svalues+starts[i],nprocs[i],MPI_INT,i,tag,comm,send_waits+count++);CHKERRQ(ierr);
1302     }
1303   }
1304   ierr = PetscFree(starts);CHKERRQ(ierr);
1305 
1306   base = owners[rank]*bs;
1307 
1308   /*  wait on receives */
1309   ierr   = PetscMalloc(2*(nrecvs+1)*sizeof(int),&lens);CHKERRQ(ierr);
1310   source = lens + nrecvs;
1311   count  = nrecvs; slen = 0;
1312   while (count) {
1313     ierr = MPI_Waitany(nrecvs,recv_waits,&imdex,&recv_status);CHKERRQ(ierr);
1314     /* unpack receives into our local space */
1315     ierr = MPI_Get_count(&recv_status,MPI_INT,&n);CHKERRQ(ierr);
1316     source[imdex]  = recv_status.MPI_SOURCE;
1317     lens[imdex]    = n;
1318     slen          += n;
1319     count--;
1320   }
1321   ierr = PetscFree(recv_waits);CHKERRQ(ierr);
1322 
1323   /* move the data into the send scatter */
1324   ierr = PetscMalloc((slen+1)*sizeof(int),&lrows);CHKERRQ(ierr);
1325   count = 0;
1326   for (i=0; i<nrecvs; i++) {
1327     values = rvalues + i*nmax;
1328     for (j=0; j<lens[i]; j++) {
1329       lrows[count++] = values[j] - base;
1330     }
1331   }
1332   ierr = PetscFree(rvalues);CHKERRQ(ierr);
1333   ierr = PetscFree(lens);CHKERRQ(ierr);
1334   ierr = PetscFree(owner);CHKERRQ(ierr);
1335   ierr = PetscFree(nprocs);CHKERRQ(ierr);
1336 
1337   /* actually zap the local rows */
1338   ierr = ISCreateGeneral(PETSC_COMM_SELF,slen,lrows,&istmp);CHKERRQ(ierr);
1339   PetscLogObjectParent(A,istmp);
1340 
1341   /*
1342         Zero the required rows. If the "diagonal block" of the matrix
1343      is square and the user wishes to set the diagonal we use seperate
1344      code so that MatSetValues() is not called for each diagonal allocating
1345      new memory, thus calling lots of mallocs and slowing things down.
1346 
1347        Contributed by: Mathew Knepley
1348   */
1349   /* must zero l->B before l->A because the (diag) case below may put values into l->B*/
1350   ierr = MatZeroRows_SeqBAIJ(l->B,istmp,0);CHKERRQ(ierr);
1351   if (diag && (l->A->M == l->A->N)) {
1352     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,diag);CHKERRQ(ierr);
1353   } else if (diag) {
1354     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1355     if (((Mat_SeqSBAIJ*)l->A->data)->nonew) {
1356       SETERRQ(PETSC_ERR_SUP,"MatZeroRows() on rectangular matrices cannot be used with the Mat options \n\
1357 MAT_NO_NEW_NONZERO_LOCATIONS,MAT_NEW_NONZERO_LOCATION_ERR,MAT_NEW_NONZERO_ALLOCATION_ERR");
1358     }
1359     for (i=0; i<slen; i++) {
1360       row  = lrows[i] + rstart_bs;
1361       ierr = MatSetValues(A,1,&row,1,&row,diag,INSERT_VALUES);CHKERRQ(ierr);
1362     }
1363     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1364     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
1365   } else {
1366     ierr = MatZeroRows_SeqSBAIJ(l->A,istmp,0);CHKERRQ(ierr);
1367   }
1368 
1369   ierr = ISDestroy(istmp);CHKERRQ(ierr);
1370   ierr = PetscFree(lrows);CHKERRQ(ierr);
1371 
1372   /* wait on sends */
1373   if (nsends) {
1374     ierr = PetscMalloc(nsends*sizeof(MPI_Status),&send_status);CHKERRQ(ierr);
1375     ierr        = MPI_Waitall(nsends,send_waits,send_status);CHKERRQ(ierr);
1376     ierr        = PetscFree(send_status);CHKERRQ(ierr);
1377   }
1378   ierr = PetscFree(send_waits);CHKERRQ(ierr);
1379   ierr = PetscFree(svalues);CHKERRQ(ierr);
1380 
1381   PetscFunctionReturn(0);
1382 }
1383 
1384 #undef __FUNCT__
1385 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1386 int MatPrintHelp_MPISBAIJ(Mat A)
1387 {
1388   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1389   MPI_Comm    comm = A->comm;
1390   static int  called = 0;
1391   int         ierr;
1392 
1393   PetscFunctionBegin;
1394   if (!a->rank) {
1395     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1396   }
1397   if (called) {PetscFunctionReturn(0);} else called = 1;
1398   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1399   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1405 int MatSetUnfactored_MPISBAIJ(Mat A)
1406 {
1407   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1408   int         ierr;
1409 
1410   PetscFunctionBegin;
1411   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1416 
1417 #undef __FUNCT__
1418 #define __FUNCT__ "MatEqual_MPISBAIJ"
1419 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1420 {
1421   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1422   Mat         a,b,c,d;
1423   PetscTruth  flg;
1424   int         ierr;
1425 
1426   PetscFunctionBegin;
1427   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg);CHKERRQ(ierr);
1428   if (!flg) SETERRQ(PETSC_ERR_ARG_INCOMP,"Matrices must be same type");
1429   a = matA->A; b = matA->B;
1430   c = matB->A; d = matB->B;
1431 
1432   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1433   if (flg == PETSC_TRUE) {
1434     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1435   }
1436   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1437   PetscFunctionReturn(0);
1438 }
1439 
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1442 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1443 {
1444   int        ierr;
1445 
1446   PetscFunctionBegin;
1447   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1448   PetscFunctionReturn(0);
1449 }
1450 /* -------------------------------------------------------------------*/
1451 static struct _MatOps MatOps_Values = {
1452   MatSetValues_MPISBAIJ,
1453   MatGetRow_MPISBAIJ,
1454   MatRestoreRow_MPISBAIJ,
1455   MatMult_MPISBAIJ_2comm,
1456   MatMultAdd_MPISBAIJ,
1457   MatMultTranspose_MPISBAIJ,
1458   MatMultTransposeAdd_MPISBAIJ,
1459   0,
1460   0,
1461   0,
1462   0,
1463   0,
1464   0,
1465   MatRelax_MPISBAIJ,
1466   MatTranspose_MPISBAIJ,
1467   MatGetInfo_MPISBAIJ,
1468   MatEqual_MPISBAIJ,
1469   MatGetDiagonal_MPISBAIJ,
1470   MatDiagonalScale_MPISBAIJ,
1471   MatNorm_MPISBAIJ,
1472   MatAssemblyBegin_MPISBAIJ,
1473   MatAssemblyEnd_MPISBAIJ,
1474   0,
1475   MatSetOption_MPISBAIJ,
1476   MatZeroEntries_MPISBAIJ,
1477   MatZeroRows_MPISBAIJ,
1478   0,
1479   0,
1480   0,
1481   0,
1482   MatSetUpPreallocation_MPISBAIJ,
1483   0,
1484   0,
1485   0,
1486   0,
1487   MatDuplicate_MPISBAIJ,
1488   0,
1489   0,
1490   0,
1491   0,
1492   0,
1493   MatGetSubMatrices_MPISBAIJ,
1494   MatIncreaseOverlap_MPISBAIJ,
1495   MatGetValues_MPISBAIJ,
1496   0,
1497   MatPrintHelp_MPISBAIJ,
1498   MatScale_MPISBAIJ,
1499   0,
1500   0,
1501   0,
1502   MatGetBlockSize_MPISBAIJ,
1503   0,
1504   0,
1505   0,
1506   0,
1507   0,
1508   0,
1509   MatSetUnfactored_MPISBAIJ,
1510   0,
1511   MatSetValuesBlocked_MPISBAIJ,
1512   0,
1513   0,
1514   0,
1515   MatGetPetscMaps_Petsc,
1516   0,
1517   0,
1518   0,
1519   0,
1520   0,
1521   0,
1522   MatGetRowMax_MPISBAIJ};
1523 
1524 
1525 EXTERN_C_BEGIN
1526 #undef __FUNCT__
1527 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1528 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1529 {
1530   PetscFunctionBegin;
1531   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1532   *iscopy = PETSC_FALSE;
1533   PetscFunctionReturn(0);
1534 }
1535 EXTERN_C_END
1536 
1537 EXTERN_C_BEGIN
1538 #undef __FUNCT__
1539 #define __FUNCT__ "MatCreate_MPISBAIJ"
1540 int MatCreate_MPISBAIJ(Mat B)
1541 {
1542   Mat_MPISBAIJ *b;
1543   int          ierr;
1544   PetscTruth   flg;
1545 
1546   PetscFunctionBegin;
1547 
1548   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1549   B->data = (void*)b;
1550   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1551   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1552 
1553   B->ops->destroy    = MatDestroy_MPISBAIJ;
1554   B->ops->view       = MatView_MPISBAIJ;
1555   B->mapping    = 0;
1556   B->factor     = 0;
1557   B->assembled  = PETSC_FALSE;
1558 
1559   B->insertmode = NOT_SET_VALUES;
1560   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1561   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1562 
1563   /* build local table of row and column ownerships */
1564   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1565   b->cowners    = b->rowners + b->size + 2;
1566   b->rowners_bs = b->cowners + b->size + 2;
1567   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1568 
1569   /* build cache for off array entries formed */
1570   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1571   b->donotstash  = PETSC_FALSE;
1572   b->colmap      = PETSC_NULL;
1573   b->garray      = PETSC_NULL;
1574   b->roworiented = PETSC_TRUE;
1575 
1576 #if defined(PETSC_USE_MAT_SINGLE)
1577   /* stuff for MatSetValues_XXX in single precision */
1578   b->setvalueslen     = 0;
1579   b->setvaluescopy    = PETSC_NULL;
1580 #endif
1581 
1582   /* stuff used in block assembly */
1583   b->barray       = 0;
1584 
1585   /* stuff used for matrix vector multiply */
1586   b->lvec         = 0;
1587   b->Mvctx        = 0;
1588   b->slvec0       = 0;
1589   b->slvec0b      = 0;
1590   b->slvec1       = 0;
1591   b->slvec1a      = 0;
1592   b->slvec1b      = 0;
1593   b->sMvctx       = 0;
1594 
1595   /* stuff for MatGetRow() */
1596   b->rowindices   = 0;
1597   b->rowvalues    = 0;
1598   b->getrowactive = PETSC_FALSE;
1599 
1600   /* hash table stuff */
1601   b->ht           = 0;
1602   b->hd           = 0;
1603   b->ht_size      = 0;
1604   b->ht_flag      = PETSC_FALSE;
1605   b->ht_fact      = 0;
1606   b->ht_total_ct  = 0;
1607   b->ht_insert_ct = 0;
1608 
1609   ierr = PetscOptionsHasName(PETSC_NULL,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1610   if (flg) {
1611     PetscReal fact = 1.39;
1612     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1613     ierr = PetscOptionsGetReal(PETSC_NULL,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1614     if (fact <= 1.0) fact = 1.39;
1615     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1616     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1617   }
1618   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1619                                      "MatStoreValues_MPISBAIJ",
1620                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1622                                      "MatRetrieveValues_MPISBAIJ",
1623                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1625                                      "MatGetDiagonalBlock_MPISBAIJ",
1626                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1627   PetscFunctionReturn(0);
1628 }
1629 EXTERN_C_END
1630 
1631 #undef __FUNCT__
1632 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1633 /*@C
1634    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1635    the user should preallocate the matrix storage by setting the parameters
1636    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1637    performance can be increased by more than a factor of 50.
1638 
1639    Collective on Mat
1640 
1641    Input Parameters:
1642 +  A - the matrix
1643 .  bs   - size of blockk
1644 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1645            submatrix  (same for all local rows)
1646 .  d_nnz - array containing the number of block nonzeros in the various block rows
1647            of the in diagonal portion of the local (possibly different for each block
1648            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1649 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1650            submatrix (same for all local rows).
1651 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1652            off-diagonal portion of the local submatrix (possibly different for
1653            each block row) or PETSC_NULL.
1654 
1655 
1656    Options Database Keys:
1657 .   -mat_no_unroll - uses code that does not unroll the loops in the
1658                      block calculations (much slower)
1659 .   -mat_block_size - size of the blocks to use
1660 
1661    Notes:
1662 
1663    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1664    than it must be used on all processors that share the object for that argument.
1665 
1666    Storage Information:
1667    For a square global matrix we define each processor's diagonal portion
1668    to be its local rows and the corresponding columns (a square submatrix);
1669    each processor's off-diagonal portion encompasses the remainder of the
1670    local matrix (a rectangular submatrix).
1671 
1672    The user can specify preallocated storage for the diagonal part of
1673    the local submatrix with either d_nz or d_nnz (not both).  Set
1674    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1675    memory allocation.  Likewise, specify preallocated storage for the
1676    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1677 
1678    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1679    the figure below we depict these three local rows and all columns (0-11).
1680 
1681 .vb
1682            0 1 2 3 4 5 6 7 8 9 10 11
1683           -------------------
1684    row 3  |  o o o d d d o o o o o o
1685    row 4  |  o o o d d d o o o o o o
1686    row 5  |  o o o d d d o o o o o o
1687           -------------------
1688 .ve
1689 
1690    Thus, any entries in the d locations are stored in the d (diagonal)
1691    submatrix, and any entries in the o locations are stored in the
1692    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1693    stored simply in the MATSEQBAIJ format for compressed row storage.
1694 
1695    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1696    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1697    In general, for PDE problems in which most nonzeros are near the diagonal,
1698    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1699    or you will get TERRIBLE performance; see the users' manual chapter on
1700    matrices.
1701 
1702    Level: intermediate
1703 
1704 .keywords: matrix, block, aij, compressed row, sparse, parallel
1705 
1706 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1707 @*/
1708 
1709 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1710 {
1711   Mat_MPISBAIJ *b;
1712   int          ierr,i,mbs,Mbs;
1713   PetscTruth   flg2;
1714 
1715   PetscFunctionBegin;
1716   ierr = PetscTypeCompare((PetscObject)B,MATMPISBAIJ,&flg2);CHKERRQ(ierr);
1717   if (!flg2) PetscFunctionReturn(0);
1718 
1719   ierr = PetscOptionsGetInt(PETSC_NULL,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1720 
1721   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1722   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1723   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1724   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1725   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1726   if (d_nnz) {
1727     for (i=0; i<B->m/bs; i++) {
1728       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1729     }
1730   }
1731   if (o_nnz) {
1732     for (i=0; i<B->m/bs; i++) {
1733       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1734     }
1735   }
1736   B->preallocated = PETSC_TRUE;
1737   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1738   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1739   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1740   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1741 
1742   b   = (Mat_MPISBAIJ*)B->data;
1743   mbs = B->m/bs;
1744   Mbs = B->M/bs;
1745   if (mbs*bs != B->m) {
1746     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1747   }
1748 
1749   b->bs  = bs;
1750   b->bs2 = bs*bs;
1751   b->mbs = mbs;
1752   b->nbs = mbs;
1753   b->Mbs = Mbs;
1754   b->Nbs = Mbs;
1755 
1756   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1757   b->rowners[0]    = 0;
1758   for (i=2; i<=b->size; i++) {
1759     b->rowners[i] += b->rowners[i-1];
1760   }
1761   b->rstart    = b->rowners[b->rank];
1762   b->rend      = b->rowners[b->rank+1];
1763   b->cstart    = b->rstart;
1764   b->cend      = b->rend;
1765   for (i=0; i<=b->size; i++) {
1766     b->rowners_bs[i] = b->rowners[i]*bs;
1767   }
1768   b->rstart_bs = b-> rstart*bs;
1769   b->rend_bs   = b->rend*bs;
1770 
1771   b->cstart_bs = b->cstart*bs;
1772   b->cend_bs   = b->cend*bs;
1773 
1774 
1775   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1776   PetscLogObjectParent(B,b->A);
1777   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1778   PetscLogObjectParent(B,b->B);
1779 
1780   /* build cache for off array entries formed */
1781   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1782 
1783   PetscFunctionReturn(0);
1784 }
1785 
1786 #undef __FUNCT__
1787 #define __FUNCT__ "MatCreateMPISBAIJ"
1788 /*@C
1789    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1790    (block compressed row).  For good matrix assembly performance
1791    the user should preallocate the matrix storage by setting the parameters
1792    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1793    performance can be increased by more than a factor of 50.
1794 
1795    Collective on MPI_Comm
1796 
1797    Input Parameters:
1798 +  comm - MPI communicator
1799 .  bs   - size of blockk
1800 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1801            This value should be the same as the local size used in creating the
1802            y vector for the matrix-vector product y = Ax.
1803 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1804            This value should be the same as the local size used in creating the
1805            x vector for the matrix-vector product y = Ax.
1806 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1807 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1808 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1809            submatrix  (same for all local rows)
1810 .  d_nnz - array containing the number of block nonzeros in the various block rows
1811            of the in diagonal portion of the local (possibly different for each block
1812            row) or PETSC_NULL.  You must leave room for the diagonal entry even if it is zero.
1813 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1814            submatrix (same for all local rows).
1815 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1816            off-diagonal portion of the local submatrix (possibly different for
1817            each block row) or PETSC_NULL.
1818 
1819    Output Parameter:
1820 .  A - the matrix
1821 
1822    Options Database Keys:
1823 .   -mat_no_unroll - uses code that does not unroll the loops in the
1824                      block calculations (much slower)
1825 .   -mat_block_size - size of the blocks to use
1826 .   -mat_mpi - use the parallel matrix data structures even on one processor
1827                (defaults to using SeqBAIJ format on one processor)
1828 
1829    Notes:
1830    The user MUST specify either the local or global matrix dimensions
1831    (possibly both).
1832 
1833    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1834    than it must be used on all processors that share the object for that argument.
1835 
1836    Storage Information:
1837    For a square global matrix we define each processor's diagonal portion
1838    to be its local rows and the corresponding columns (a square submatrix);
1839    each processor's off-diagonal portion encompasses the remainder of the
1840    local matrix (a rectangular submatrix).
1841 
1842    The user can specify preallocated storage for the diagonal part of
1843    the local submatrix with either d_nz or d_nnz (not both).  Set
1844    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1845    memory allocation.  Likewise, specify preallocated storage for the
1846    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1847 
1848    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1849    the figure below we depict these three local rows and all columns (0-11).
1850 
1851 .vb
1852            0 1 2 3 4 5 6 7 8 9 10 11
1853           -------------------
1854    row 3  |  o o o d d d o o o o o o
1855    row 4  |  o o o d d d o o o o o o
1856    row 5  |  o o o d d d o o o o o o
1857           -------------------
1858 .ve
1859 
1860    Thus, any entries in the d locations are stored in the d (diagonal)
1861    submatrix, and any entries in the o locations are stored in the
1862    o (off-diagonal) submatrix.  Note that the d and the o submatrices are
1863    stored simply in the MATSEQBAIJ format for compressed row storage.
1864 
1865    Now d_nz should indicate the number of block nonzeros per row in the d matrix,
1866    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1867    In general, for PDE problems in which most nonzeros are near the diagonal,
1868    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1869    or you will get TERRIBLE performance; see the users' manual chapter on
1870    matrices.
1871 
1872    Level: intermediate
1873 
1874 .keywords: matrix, block, aij, compressed row, sparse, parallel
1875 
1876 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1877 @*/
1878 
1879 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,int *d_nnz,int o_nz,int *o_nnz,Mat *A)
1880 {
1881   int ierr,size;
1882 
1883   PetscFunctionBegin;
1884   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1885   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1886   if (size > 1) {
1887     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1888     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1889   } else {
1890     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1891     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1892   }
1893   PetscFunctionReturn(0);
1894 }
1895 
1896 
1897 #undef __FUNCT__
1898 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1899 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1900 {
1901   Mat          mat;
1902   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1903   int          ierr,len=0;
1904 
1905   PetscFunctionBegin;
1906   *newmat       = 0;
1907   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1908   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
1909   mat->preallocated = PETSC_TRUE;
1910   a = (Mat_MPISBAIJ*)mat->data;
1911   a->bs  = oldmat->bs;
1912   a->bs2 = oldmat->bs2;
1913   a->mbs = oldmat->mbs;
1914   a->nbs = oldmat->nbs;
1915   a->Mbs = oldmat->Mbs;
1916   a->Nbs = oldmat->Nbs;
1917 
1918   a->rstart       = oldmat->rstart;
1919   a->rend         = oldmat->rend;
1920   a->cstart       = oldmat->cstart;
1921   a->cend         = oldmat->cend;
1922   a->size         = oldmat->size;
1923   a->rank         = oldmat->rank;
1924   a->donotstash   = oldmat->donotstash;
1925   a->roworiented  = oldmat->roworiented;
1926   a->rowindices   = 0;
1927   a->rowvalues    = 0;
1928   a->getrowactive = PETSC_FALSE;
1929   a->barray       = 0;
1930   a->rstart_bs    = oldmat->rstart_bs;
1931   a->rend_bs      = oldmat->rend_bs;
1932   a->cstart_bs    = oldmat->cstart_bs;
1933   a->cend_bs      = oldmat->cend_bs;
1934 
1935   /* hash table stuff */
1936   a->ht           = 0;
1937   a->hd           = 0;
1938   a->ht_size      = 0;
1939   a->ht_flag      = oldmat->ht_flag;
1940   a->ht_fact      = oldmat->ht_fact;
1941   a->ht_total_ct  = 0;
1942   a->ht_insert_ct = 0;
1943 
1944   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1945   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1946   a->cowners    = a->rowners + a->size + 2;
1947   a->rowners_bs = a->cowners + a->size + 2;
1948   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
1949   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1950   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
1951   if (oldmat->colmap) {
1952 #if defined (PETSC_USE_CTABLE)
1953     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1954 #else
1955     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
1956     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
1957     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
1958 #endif
1959   } else a->colmap = 0;
1960   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1961     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
1962     PetscLogObjectMemory(mat,len*sizeof(int));
1963     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
1964   } else a->garray = 0;
1965 
1966   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1967   PetscLogObjectParent(mat,a->lvec);
1968   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1969 
1970   PetscLogObjectParent(mat,a->Mvctx);
1971   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1972   PetscLogObjectParent(mat,a->A);
1973   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1974   PetscLogObjectParent(mat,a->B);
1975   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1976   *newmat = mat;
1977   PetscFunctionReturn(0);
1978 }
1979 
1980 #include "petscsys.h"
1981 
1982 EXTERN_C_BEGIN
1983 #undef __FUNCT__
1984 #define __FUNCT__ "MatLoad_MPISBAIJ"
1985 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
1986 {
1987   Mat          A;
1988   int          i,nz,ierr,j,rstart,rend,fd;
1989   PetscScalar  *vals,*buf;
1990   MPI_Comm     comm = ((PetscObject)viewer)->comm;
1991   MPI_Status   status;
1992   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
1993   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
1994   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
1995   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1996   int          dcount,kmax,k,nzcount,tmp;
1997 
1998   PetscFunctionBegin;
1999   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2000 
2001   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2002   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2003   if (!rank) {
2004     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2005     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2006     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2007     if (header[3] < 0) {
2008       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2009     }
2010   }
2011 
2012   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2013   M = header[1]; N = header[2];
2014 
2015   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2016 
2017   /*
2018      This code adds extra rows to make sure the number of rows is
2019      divisible by the blocksize
2020   */
2021   Mbs        = M/bs;
2022   extra_rows = bs - M + bs*(Mbs);
2023   if (extra_rows == bs) extra_rows = 0;
2024   else                  Mbs++;
2025   if (extra_rows &&!rank) {
2026     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2027   }
2028 
2029   /* determine ownership of all rows */
2030   mbs        = Mbs/size + ((Mbs % size) > rank);
2031   m          = mbs*bs;
2032   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2033   browners   = rowners + size + 1;
2034   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2035   rowners[0] = 0;
2036   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2037   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2038   rstart = rowners[rank];
2039   rend   = rowners[rank+1];
2040 
2041   /* distribute row lengths to all processors */
2042   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2043   if (!rank) {
2044     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2045     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2046     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2047     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2048     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2049     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2050     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2051   } else {
2052     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2053   }
2054 
2055   if (!rank) {   /* procs[0] */
2056     /* calculate the number of nonzeros on each processor */
2057     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2058     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2059     for (i=0; i<size; i++) {
2060       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2061         procsnz[i] += rowlengths[j];
2062       }
2063     }
2064     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2065 
2066     /* determine max buffer needed and allocate it */
2067     maxnz = 0;
2068     for (i=0; i<size; i++) {
2069       maxnz = PetscMax(maxnz,procsnz[i]);
2070     }
2071     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2072 
2073     /* read in my part of the matrix column indices  */
2074     nz     = procsnz[0];
2075     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2076     mycols = ibuf;
2077     if (size == 1)  nz -= extra_rows;
2078     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2079     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2080 
2081     /* read in every ones (except the last) and ship off */
2082     for (i=1; i<size-1; i++) {
2083       nz   = procsnz[i];
2084       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2085       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2086     }
2087     /* read in the stuff for the last proc */
2088     if (size != 1) {
2089       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2090       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2091       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2092       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2093     }
2094     ierr = PetscFree(cols);CHKERRQ(ierr);
2095   } else {  /* procs[i], i>0 */
2096     /* determine buffer space needed for message */
2097     nz = 0;
2098     for (i=0; i<m; i++) {
2099       nz += locrowlens[i];
2100     }
2101     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2102     mycols = ibuf;
2103     /* receive message of column indices*/
2104     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2105     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2106     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2107   }
2108 
2109   /* loop over local rows, determining number of off diagonal entries */
2110   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2111   odlens   = dlens + (rend-rstart);
2112   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2113   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2114   masked1  = mask    + Mbs;
2115   masked2  = masked1 + Mbs;
2116   rowcount = 0; nzcount = 0;
2117   for (i=0; i<mbs; i++) {
2118     dcount  = 0;
2119     odcount = 0;
2120     for (j=0; j<bs; j++) {
2121       kmax = locrowlens[rowcount];
2122       for (k=0; k<kmax; k++) {
2123         tmp = mycols[nzcount++]/bs; /* block col. index */
2124         if (!mask[tmp]) {
2125           mask[tmp] = 1;
2126           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2127           else masked1[dcount++] = tmp; /* entry in diag portion */
2128         }
2129       }
2130       rowcount++;
2131     }
2132 
2133     dlens[i]  = dcount;  /* d_nzz[i] */
2134     odlens[i] = odcount; /* o_nzz[i] */
2135 
2136     /* zero out the mask elements we set */
2137     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2138     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2139   }
2140 
2141   /* create our matrix */
2142   ierr = MatCreateMPISBAIJ(comm,bs,m,m,PETSC_DETERMINE,PETSC_DETERMINE,0,dlens,0,odlens,newmat);
2143   CHKERRQ(ierr);
2144   A = *newmat;
2145   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2146 
2147   if (!rank) {
2148     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2149     /* read in my part of the matrix numerical values  */
2150     nz = procsnz[0];
2151     vals = buf;
2152     mycols = ibuf;
2153     if (size == 1)  nz -= extra_rows;
2154     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2155     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2156 
2157     /* insert into matrix */
2158     jj      = rstart*bs;
2159     for (i=0; i<m; i++) {
2160       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2161       mycols += locrowlens[i];
2162       vals   += locrowlens[i];
2163       jj++;
2164     }
2165 
2166     /* read in other processors (except the last one) and ship out */
2167     for (i=1; i<size-1; i++) {
2168       nz   = procsnz[i];
2169       vals = buf;
2170       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2171       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2172     }
2173     /* the last proc */
2174     if (size != 1){
2175       nz   = procsnz[i] - extra_rows;
2176       vals = buf;
2177       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2178       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2179       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2180     }
2181     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2182 
2183   } else {
2184     /* receive numeric values */
2185     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2186 
2187     /* receive message of values*/
2188     vals   = buf;
2189     mycols = ibuf;
2190     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2191     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2192     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2193 
2194     /* insert into matrix */
2195     jj      = rstart*bs;
2196     for (i=0; i<m; i++) {
2197       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2198       mycols += locrowlens[i];
2199       vals   += locrowlens[i];
2200       jj++;
2201     }
2202   }
2203 
2204   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2205   ierr = PetscFree(buf);CHKERRQ(ierr);
2206   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2207   ierr = PetscFree(rowners);CHKERRQ(ierr);
2208   ierr = PetscFree(dlens);CHKERRQ(ierr);
2209   ierr = PetscFree(mask);CHKERRQ(ierr);
2210   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2211   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2212   PetscFunctionReturn(0);
2213 }
2214 EXTERN_C_END
2215 
2216 #undef __FUNCT__
2217 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2218 /*@
2219    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2220 
2221    Input Parameters:
2222 .  mat  - the matrix
2223 .  fact - factor
2224 
2225    Collective on Mat
2226 
2227    Level: advanced
2228 
2229   Notes:
2230    This can also be set by the command line option: -mat_use_hash_table fact
2231 
2232 .keywords: matrix, hashtable, factor, HT
2233 
2234 .seealso: MatSetOption()
2235 @*/
2236 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2237 {
2238   PetscFunctionBegin;
2239   SETERRQ(1,"Function not yet written for SBAIJ format");
2240   /* PetscFunctionReturn(0); */
2241 }
2242 
2243 #undef __FUNCT__
2244 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2245 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2246 {
2247   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2248   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2249   PetscReal    atmp;
2250   PetscReal    *work,*svalues,*rvalues;
2251   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2252   int          rank,size,*rowners_bs,dest,count,source;
2253   PetscScalar  *va;
2254   MatScalar    *ba;
2255   MPI_Status   stat;
2256 
2257   PetscFunctionBegin;
2258   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2259   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2260 
2261   ierr = MPI_Comm_size(PETSC_COMM_WORLD,&size);CHKERRQ(ierr);
2262   ierr = MPI_Comm_rank(PETSC_COMM_WORLD,&rank);CHKERRQ(ierr);
2263 
2264   bs   = a->bs;
2265   mbs  = a->mbs;
2266   Mbs  = a->Mbs;
2267   ba   = b->a;
2268   bi   = b->i;
2269   bj   = b->j;
2270   /*
2271   PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2272   PetscSynchronizedFlush(PETSC_COMM_WORLD);
2273   */
2274 
2275   /* find ownerships */
2276   rowners_bs = a->rowners_bs;
2277   /*
2278   if (!rank){
2279     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2280   }
2281   */
2282 
2283   /* each proc creates an array to be distributed */
2284   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2285   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2286 
2287   /* row_max for B */
2288   if (rank != size-1){
2289     for (i=0; i<mbs; i++) {
2290       ncols = bi[1] - bi[0]; bi++;
2291       brow  = bs*i;
2292       for (j=0; j<ncols; j++){
2293         bcol = bs*(*bj);
2294         for (kcol=0; kcol<bs; kcol++){
2295           col = bcol + kcol;                 /* local col index */
2296           col += rowners_bs[rank+1];      /* global col index */
2297           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2298           for (krow=0; krow<bs; krow++){
2299             atmp = PetscAbsScalar(*ba); ba++;
2300             row = brow + krow;    /* local row index */
2301             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2302             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2303             if (work[col] < atmp) work[col] = atmp;
2304           }
2305         }
2306         bj++;
2307       }
2308     }
2309     /*
2310       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2311       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2312       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2313       */
2314 
2315     /* send values to its owners */
2316     for (dest=rank+1; dest<size; dest++){
2317       svalues = work + rowners_bs[dest];
2318       count   = rowners_bs[dest+1]-rowners_bs[dest];
2319       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,PETSC_COMM_WORLD);CHKERRQ(ierr);
2320       /*
2321       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2322       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2323       */
2324     }
2325   }
2326 
2327   /* receive values */
2328   if (rank){
2329     rvalues = work;
2330     count   = rowners_bs[rank+1]-rowners_bs[rank];
2331     for (source=0; source<rank; source++){
2332       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,PETSC_COMM_WORLD,&stat);CHKERRQ(ierr);
2333       /* process values */
2334       for (i=0; i<count; i++){
2335         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2336       }
2337       /*
2338       PetscSynchronizedPrintf(PETSC_COMM_WORLD,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2339       PetscSynchronizedFlush(PETSC_COMM_WORLD);
2340       */
2341     }
2342   }
2343 
2344   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2345   ierr = PetscFree(work);CHKERRQ(ierr);
2346   PetscFunctionReturn(0);
2347 }
2348 
2349 #undef __FUNCT__
2350 #define __FUNCT__ "MatRelax_MPISBAIJ"
2351 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2352 {
2353   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2354   int            ierr;
2355   PetscScalar    mone=-1.0;
2356   Vec            lvec1,bb1;
2357 
2358   PetscFunctionBegin;
2359   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2360   if (mat->bs > 1)
2361     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2362 
2363   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2364     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2365       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
2366       its--;
2367     }
2368 
2369     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2370     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2371     while (its--){
2372       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2373 
2374       /* lower diagonal part: bb1 = bb - B^T*xx */
2375       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2376       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2377 
2378       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2379       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2380       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2381 
2382       /* upper diagonal part: bb1 = bb1 - B*x */
2383       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2384       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2385 
2386       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2387 
2388       /* diagonal sweep */
2389       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,PETSC_NULL,xx);CHKERRQ(ierr);
2390     }
2391     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2392     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2393   } else {
2394     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2395   }
2396   PetscFunctionReturn(0);
2397 }
2398 
2399