xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 899d7b4fa70258d434a4457e69be066340eef5eb)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "src/vec/vecimpl.h"
5 #include "mpisbaij.h"
6 #include "src/mat/impls/sbaij/seq/sbaij.h"
7 
8 extern int MatSetUpMultiply_MPISBAIJ(Mat);
9 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
10 extern int DisAssemble_MPISBAIJ(Mat);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
13 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
14 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
15 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
16 extern int MatPrintHelp_SeqSBAIJ(Mat);
17 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
18 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
19 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
20 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
21 
22 /*  UGLY, ugly, ugly
23    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
24    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
25    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
26    converts the entries into single precision and then calls ..._MatScalar() to put them
27    into the single precision data structures.
28 */
29 #if defined(PETSC_USE_MAT_SINGLE)
30 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
31 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
32 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 #else
36 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
37 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
38 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
39 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
40 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
41 #endif
42 
43 EXTERN_C_BEGIN
44 #undef __FUNCT__
45 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
46 int MatStoreValues_MPISBAIJ(Mat mat)
47 {
48   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
49   int          ierr;
50 
51   PetscFunctionBegin;
52   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
53   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
54   PetscFunctionReturn(0);
55 }
56 EXTERN_C_END
57 
58 EXTERN_C_BEGIN
59 #undef __FUNCT__
60 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
61 int MatRetrieveValues_MPISBAIJ(Mat mat)
62 {
63   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
64   int          ierr;
65 
66   PetscFunctionBegin;
67   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
68   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
69   PetscFunctionReturn(0);
70 }
71 EXTERN_C_END
72 
73 /*
74      Local utility routine that creates a mapping from the global column
75    number to the local number in the off-diagonal part of the local
76    storage of the matrix.  This is done in a non scable way since the
77    length of colmap equals the global matrix length.
78 */
79 #undef __FUNCT__
80 #define __FUNCT__ "CreateColmap_MPISBAIJ_Private"
81 static int CreateColmap_MPISBAIJ_Private(Mat mat)
82 {
83   PetscFunctionBegin;
84   SETERRQ(1,"Function not yet written for SBAIJ format");
85   /* PetscFunctionReturn(0); */
86 }
87 
88 #define CHUNKSIZE  10
89 
90 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
91 { \
92  \
93     brow = row/bs;  \
94     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
95     rmax = aimax[brow]; nrow = ailen[brow]; \
96       bcol = col/bs; \
97       ridx = row % bs; cidx = col % bs; \
98       low = 0; high = nrow; \
99       while (high-low > 3) { \
100         t = (low+high)/2; \
101         if (rp[t] > bcol) high = t; \
102         else              low  = t; \
103       } \
104       for (_i=low; _i<high; _i++) { \
105         if (rp[_i] > bcol) break; \
106         if (rp[_i] == bcol) { \
107           bap  = ap +  bs2*_i + bs*cidx + ridx; \
108           if (addv == ADD_VALUES) *bap += value;  \
109           else                    *bap  = value;  \
110           goto a_noinsert; \
111         } \
112       } \
113       if (a->nonew == 1) goto a_noinsert; \
114       else if (a->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
115       if (nrow >= rmax) { \
116         /* there is no extra room in row, therefore enlarge */ \
117         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
118         MatScalar *new_a; \
119  \
120         if (a->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
121  \
122         /* malloc new storage space */ \
123         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
124         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
125         new_j = (int*)(new_a + bs2*new_nz); \
126         new_i = new_j + new_nz; \
127  \
128         /* copy over old data into new slots */ \
129         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
130         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
131         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
132         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
133         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
134         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
135         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
136         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
137                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
138         /* free up old matrix storage */ \
139         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
140         if (!a->singlemalloc) { \
141           ierr = PetscFree(a->i);CHKERRQ(ierr); \
142           ierr = PetscFree(a->j);CHKERRQ(ierr);\
143         } \
144         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
145         a->singlemalloc = PETSC_TRUE; \
146  \
147         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
148         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
149         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
150         a->s_maxnz += bs2*CHUNKSIZE; \
151         a->reallocs++; \
152         a->s_nz++; \
153       } \
154       N = nrow++ - 1;  \
155       /* shift up all the later entries in this row */ \
156       for (ii=N; ii>=_i; ii--) { \
157         rp[ii+1] = rp[ii]; \
158         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
159       } \
160       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
161       rp[_i]                      = bcol;  \
162       ap[bs2*_i + bs*cidx + ridx] = value;  \
163       a_noinsert:; \
164     ailen[brow] = nrow; \
165 }
166 #ifndef MatSetValues_SeqBAIJ_B_Private
167 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
168 { \
169     brow = row/bs;  \
170     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
171     rmax = bimax[brow]; nrow = bilen[brow]; \
172       bcol = col/bs; \
173       ridx = row % bs; cidx = col % bs; \
174       low = 0; high = nrow; \
175       while (high-low > 3) { \
176         t = (low+high)/2; \
177         if (rp[t] > bcol) high = t; \
178         else              low  = t; \
179       } \
180       for (_i=low; _i<high; _i++) { \
181         if (rp[_i] > bcol) break; \
182         if (rp[_i] == bcol) { \
183           bap  = ap +  bs2*_i + bs*cidx + ridx; \
184           if (addv == ADD_VALUES) *bap += value;  \
185           else                    *bap  = value;  \
186           goto b_noinsert; \
187         } \
188       } \
189       if (b->nonew == 1) goto b_noinsert; \
190       else if (b->nonew == -1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero into matrix"); \
191       if (nrow >= rmax) { \
192         /* there is no extra room in row, therefore enlarge */ \
193         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
194         MatScalar *new_a; \
195  \
196         if (b->nonew == -2) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero in the matrix"); \
197  \
198         /* malloc new storage space */ \
199         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
200         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
201         new_j = (int*)(new_a + bs2*new_nz); \
202         new_i = new_j + new_nz; \
203  \
204         /* copy over old data into new slots */ \
205         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
206         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
207         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
208         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
209         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
210         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
211         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
212         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
213                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
214         /* free up old matrix storage */ \
215         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
216         if (!b->singlemalloc) { \
217           ierr = PetscFree(b->i);CHKERRQ(ierr); \
218           ierr = PetscFree(b->j);CHKERRQ(ierr); \
219         } \
220         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
221         b->singlemalloc = PETSC_TRUE; \
222  \
223         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
224         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
225         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
226         b->maxnz += bs2*CHUNKSIZE; \
227         b->reallocs++; \
228         b->nz++; \
229       } \
230       N = nrow++ - 1;  \
231       /* shift up all the later entries in this row */ \
232       for (ii=N; ii>=_i; ii--) { \
233         rp[ii+1] = rp[ii]; \
234         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
235       } \
236       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
237       rp[_i]                      = bcol;  \
238       ap[bs2*_i + bs*cidx + ridx] = value;  \
239       b_noinsert:; \
240     bilen[brow] = nrow; \
241 }
242 #endif
243 
244 #if defined(PETSC_USE_MAT_SINGLE)
245 #undef __FUNCT__
246 #define __FUNCT__ "MatSetValues_MPISBAIJ"
247 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
248 {
249   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
250   int          ierr,i,N = m*n;
251   MatScalar    *vsingle;
252 
253   PetscFunctionBegin;
254   if (N > b->setvalueslen) {
255     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
256     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
257     b->setvalueslen  = N;
258   }
259   vsingle = b->setvaluescopy;
260 
261   for (i=0; i<N; i++) {
262     vsingle[i] = v[i];
263   }
264   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
265   PetscFunctionReturn(0);
266 }
267 
268 #undef __FUNCT__
269 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
270 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
271 {
272   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
273   int         ierr,i,N = m*n*b->bs2;
274   MatScalar   *vsingle;
275 
276   PetscFunctionBegin;
277   if (N > b->setvalueslen) {
278     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
279     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
280     b->setvalueslen  = N;
281   }
282   vsingle = b->setvaluescopy;
283   for (i=0; i<N; i++) {
284     vsingle[i] = v[i];
285   }
286   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
287   PetscFunctionReturn(0);
288 }
289 
290 #undef __FUNCT__
291 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
292 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
293 {
294   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
295   int         ierr,i,N = m*n;
296   MatScalar   *vsingle;
297 
298   PetscFunctionBegin;
299   SETERRQ(1,"Function not yet written for SBAIJ format");
300   /* PetscFunctionReturn(0); */
301 }
302 
303 #undef __FUNCT__
304 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
305 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
306 {
307   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
308   int         ierr,i,N = m*n*b->bs2;
309   MatScalar   *vsingle;
310 
311   PetscFunctionBegin;
312   SETERRQ(1,"Function not yet written for SBAIJ format");
313   /* PetscFunctionReturn(0); */
314 }
315 #endif
316 
317 /* Only add/insert a(i,j) with i<=j (blocks).
318    Any a(i,j) with i>j input by user is ingored.
319 */
320 #undef __FUNCT__
321 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
322 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
323 {
324   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
325   MatScalar    value;
326   PetscTruth   roworiented = baij->roworiented;
327   int          ierr,i,j,row,col;
328   int          rstart_orig=baij->rstart_bs;
329   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
330   int          cend_orig=baij->cend_bs,bs=baij->bs;
331 
332   /* Some Variables required in the macro */
333   Mat          A = baij->A;
334   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
335   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
336   MatScalar    *aa=a->a;
337 
338   Mat          B = baij->B;
339   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
340   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
341   MatScalar    *ba=b->a;
342 
343   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
344   int          low,high,t,ridx,cidx,bs2=a->bs2;
345   MatScalar    *ap,*bap;
346 
347   /* for stash */
348   int          n_loc, *in_loc=0;
349   MatScalar    *v_loc=0;
350 
351   PetscFunctionBegin;
352 
353   if(!baij->donotstash){
354     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
355     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
356   }
357 
358   for (i=0; i<m; i++) {
359     if (im[i] < 0) continue;
360 #if defined(PETSC_USE_BOPT_g)
361     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
362 #endif
363     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
364       row = im[i] - rstart_orig;              /* local row index */
365       for (j=0; j<n; j++) {
366         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
367         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
368           col = in[j] - cstart_orig;          /* local col index */
369           brow = row/bs; bcol = col/bs;
370           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
371           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
372           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
373           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
374         } else if (in[j] < 0) continue;
375 #if defined(PETSC_USE_BOPT_g)
376         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
377 #endif
378         else {  /* off-diag entry (B) */
379           if (mat->was_assembled) {
380             if (!baij->colmap) {
381               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
382             }
383 #if defined (PETSC_USE_CTABLE)
384             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
385             col  = col - 1;
386 #else
387             col = baij->colmap[in[j]/bs] - 1;
388 #endif
389             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
390               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
391               col =  in[j];
392               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
393               B = baij->B;
394               b = (Mat_SeqBAIJ*)(B)->data;
395               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
396               ba=b->a;
397             } else col += in[j]%bs;
398           } else col = in[j];
399           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
400           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
401           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
402         }
403       }
404     } else {  /* off processor entry */
405       if (!baij->donotstash) {
406         n_loc = 0;
407         for (j=0; j<n; j++){
408           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
409           in_loc[n_loc] = in[j];
410           if (roworiented) {
411             v_loc[n_loc] = v[i*n+j];
412           } else {
413             v_loc[n_loc] = v[j*m+i];
414           }
415           n_loc++;
416         }
417         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
418       }
419     }
420   }
421 
422   if(!baij->donotstash){
423     ierr = PetscFree(in_loc);CHKERRQ(ierr);
424     ierr = PetscFree(v_loc);CHKERRQ(ierr);
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
431 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
432 {
433   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
434   const MatScalar *value;
435   MatScalar       *barray=baij->barray;
436   PetscTruth      roworiented = baij->roworiented;
437   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
438   int             rend=baij->rend,cstart=baij->cstart,stepval;
439   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
440 
441   PetscFunctionBegin;
442   if(!barray) {
443     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
444     baij->barray = barray;
445   }
446 
447   if (roworiented) {
448     stepval = (n-1)*bs;
449   } else {
450     stepval = (m-1)*bs;
451   }
452   for (i=0; i<m; i++) {
453     if (im[i] < 0) continue;
454 #if defined(PETSC_USE_BOPT_g)
455     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
456 #endif
457     if (im[i] >= rstart && im[i] < rend) {
458       row = im[i] - rstart;
459       for (j=0; j<n; j++) {
460         /* If NumCol = 1 then a copy is not required */
461         if ((roworiented) && (n == 1)) {
462           barray = (MatScalar*) v + i*bs2;
463         } else if((!roworiented) && (m == 1)) {
464           barray = (MatScalar*) v + j*bs2;
465         } else { /* Here a copy is required */
466           if (roworiented) {
467             value = v + i*(stepval+bs)*bs + j*bs;
468           } else {
469             value = v + j*(stepval+bs)*bs + i*bs;
470           }
471           for (ii=0; ii<bs; ii++,value+=stepval) {
472             for (jj=0; jj<bs; jj++) {
473               *barray++  = *value++;
474             }
475           }
476           barray -=bs2;
477         }
478 
479         if (in[j] >= cstart && in[j] < cend){
480           col  = in[j] - cstart;
481           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
482         }
483         else if (in[j] < 0) continue;
484 #if defined(PETSC_USE_BOPT_g)
485         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
486 #endif
487         else {
488           if (mat->was_assembled) {
489             if (!baij->colmap) {
490               ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
491             }
492 
493 #if defined(PETSC_USE_BOPT_g)
494 #if defined (PETSC_USE_CTABLE)
495             { int data;
496               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
497               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
498             }
499 #else
500             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
501 #endif
502 #endif
503 #if defined (PETSC_USE_CTABLE)
504 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
505             col  = (col - 1)/bs;
506 #else
507             col = (baij->colmap[in[j]] - 1)/bs;
508 #endif
509             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
510               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
511               col =  in[j];
512             }
513           }
514           else col = in[j];
515           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
516         }
517       }
518     } else {
519       if (!baij->donotstash) {
520         if (roworiented) {
521           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
522         } else {
523           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
524         }
525       }
526     }
527   }
528   PetscFunctionReturn(0);
529 }
530 
531 #define HASH_KEY 0.6180339887
532 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
533 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
534 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
535 #undef __FUNCT__
536 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
537 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
538 {
539   PetscFunctionBegin;
540   SETERRQ(1,"Function not yet written for SBAIJ format");
541   /* PetscFunctionReturn(0); */
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
546 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
547 {
548   PetscFunctionBegin;
549   SETERRQ(1,"Function not yet written for SBAIJ format");
550   /* PetscFunctionReturn(0); */
551 }
552 
553 #undef __FUNCT__
554 #define __FUNCT__ "MatGetValues_MPISBAIJ"
555 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
556 {
557   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
558   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
559   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
560 
561   PetscFunctionBegin;
562   for (i=0; i<m; i++) {
563     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
564     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
565     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
566       row = idxm[i] - bsrstart;
567       for (j=0; j<n; j++) {
568         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
569         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
570         if (idxn[j] >= bscstart && idxn[j] < bscend){
571           col = idxn[j] - bscstart;
572           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
573         } else {
574           if (!baij->colmap) {
575             ierr = CreateColmap_MPISBAIJ_Private(mat);CHKERRQ(ierr);
576           }
577 #if defined (PETSC_USE_CTABLE)
578           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
579           data --;
580 #else
581           data = baij->colmap[idxn[j]/bs]-1;
582 #endif
583           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
584           else {
585             col  = data + idxn[j]%bs;
586             ierr = MatGetValues_SeqSBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
587           }
588         }
589       }
590     } else {
591       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
592     }
593   }
594  PetscFunctionReturn(0);
595 }
596 
597 #undef __FUNCT__
598 #define __FUNCT__ "MatNorm_MPISBAIJ"
599 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
600 {
601   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
602   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
603   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
604   int        ierr;
605   PetscReal  sum[2],*lnorm2;
606 
607   PetscFunctionBegin;
608   if (baij->size == 1) {
609     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
610   } else {
611     if (type == NORM_FROBENIUS) {
612       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
613       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
614       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
615       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
616       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
617       /*
618       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
619       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
620       */
621       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
622       /*
623       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
624       PetscSynchronizedFlush(mat->comm); */
625 
626       *norm = sqrt(sum[0] + 2*sum[1]);
627       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
628     } else {
629       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
630     }
631   }
632   PetscFunctionReturn(0);
633 }
634 
635 /*
636   Creates the hash table, and sets the table
637   This table is created only once.
638   If new entried need to be added to the matrix
639   then the hash table has to be destroyed and
640   recreated.
641 */
642 #undef __FUNCT__
643 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
644 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
645 {
646   PetscFunctionBegin;
647   SETERRQ(1,"Function not yet written for SBAIJ format");
648   /* PetscFunctionReturn(0); */
649 }
650 
651 #undef __FUNCT__
652 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
653 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
654 {
655   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
656   int         ierr,nstash,reallocs;
657   InsertMode  addv;
658 
659   PetscFunctionBegin;
660   if (baij->donotstash) {
661     PetscFunctionReturn(0);
662   }
663 
664   /* make sure all processors are either in INSERTMODE or ADDMODE */
665   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
666   if (addv == (ADD_VALUES|INSERT_VALUES)) {
667     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
668   }
669   mat->insertmode = addv; /* in case this processor had no cache */
670 
671   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
672   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
673   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
674   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
675   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
676   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
677   PetscFunctionReturn(0);
678 }
679 
680 #undef __FUNCT__
681 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
682 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
683 {
684   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
685   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
686   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
687   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
688   int         *row,*col,other_disassembled;
689   PetscTruth  r1,r2,r3;
690   MatScalar   *val;
691   InsertMode  addv = mat->insertmode;
692 #if defined(PETSC_HAVE_MUMPS)
693   PetscTruth  flag;
694 #endif
695 
696   PetscFunctionBegin;
697 
698   if (!baij->donotstash) {
699     while (1) {
700       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
701       /*
702       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
703       PetscSynchronizedFlush(mat->comm);
704       */
705       if (!flg) break;
706 
707       for (i=0; i<n;) {
708         /* Now identify the consecutive vals belonging to the same row */
709         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
710         if (j < n) ncols = j-i;
711         else       ncols = n-i;
712         /* Now assemble all these values with a single function call */
713         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
714         i = j;
715       }
716     }
717     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
718     /* Now process the block-stash. Since the values are stashed column-oriented,
719        set the roworiented flag to column oriented, and after MatSetValues()
720        restore the original flags */
721     r1 = baij->roworiented;
722     r2 = a->roworiented;
723     r3 = b->roworiented;
724     baij->roworiented = PETSC_FALSE;
725     a->roworiented    = PETSC_FALSE;
726     b->roworiented    = PETSC_FALSE;
727     while (1) {
728       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
729       if (!flg) break;
730 
731       for (i=0; i<n;) {
732         /* Now identify the consecutive vals belonging to the same row */
733         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
734         if (j < n) ncols = j-i;
735         else       ncols = n-i;
736         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
737         i = j;
738       }
739     }
740     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
741     baij->roworiented = r1;
742     a->roworiented    = r2;
743     b->roworiented    = r3;
744   }
745 
746   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
747   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
748 
749   /* determine if any processor has disassembled, if so we must
750      also disassemble ourselfs, in order that we may reassemble. */
751   /*
752      if nonzero structure of submatrix B cannot change then we know that
753      no processor disassembled thus we can skip this stuff
754   */
755   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
756     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
757     if (mat->was_assembled && !other_disassembled) {
758       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
759     }
760   }
761 
762   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
763     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
764   }
765   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
766   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
767 
768 #if defined(PETSC_USE_BOPT_g)
769   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
770     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
771     baij->ht_total_ct  = 0;
772     baij->ht_insert_ct = 0;
773   }
774 #endif
775   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
776     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
777     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
778     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
779   }
780 
781   if (baij->rowvalues) {
782     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
783     baij->rowvalues = 0;
784   }
785 
786 #if defined(PETSC_HAVE_MUMPS)
787   ierr = PetscOptionsHasName(mat->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr);
788   if (flag) { ierr = MatUseMUMPS_MPIAIJ(mat);CHKERRQ(ierr); }
789 #endif
790   PetscFunctionReturn(0);
791 }
792 
793 #undef __FUNCT__
794 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
795 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
796 {
797   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
798   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
799   PetscTruth        isascii,isdraw;
800   PetscViewer       sviewer;
801   PetscViewerFormat format;
802 
803   PetscFunctionBegin;
804   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
805   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
806   if (isascii) {
807     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
808     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
809       MatInfo info;
810       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
811       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
812       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
813               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
814               baij->bs,(int)info.memory);CHKERRQ(ierr);
815       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
816       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
817       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
818       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
819       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
820       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
821       PetscFunctionReturn(0);
822     } else if (format == PETSC_VIEWER_ASCII_INFO) {
823       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
824       PetscFunctionReturn(0);
825     }
826   }
827 
828   if (isdraw) {
829     PetscDraw       draw;
830     PetscTruth isnull;
831     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
832     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
833   }
834 
835   if (size == 1) {
836     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
837     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
838   } else {
839     /* assemble the entire matrix onto first processor. */
840     Mat         A;
841     Mat_SeqSBAIJ *Aloc;
842     Mat_SeqBAIJ *Bloc;
843     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
844     MatScalar   *a;
845 
846     if (!rank) {
847       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
848     } else {
849       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
850     }
851     PetscLogObjectParent(mat,A);
852 
853     /* copy over the A part */
854     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
855     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
856     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
857 
858     for (i=0; i<mbs; i++) {
859       rvals[0] = bs*(baij->rstart + i);
860       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
861       for (j=ai[i]; j<ai[i+1]; j++) {
862         col = (baij->cstart+aj[j])*bs;
863         for (k=0; k<bs; k++) {
864           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
865           col++; a += bs;
866         }
867       }
868     }
869     /* copy over the B part */
870     Bloc = (Mat_SeqBAIJ*)baij->B->data;
871     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
872     for (i=0; i<mbs; i++) {
873       rvals[0] = bs*(baij->rstart + i);
874       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
875       for (j=ai[i]; j<ai[i+1]; j++) {
876         col = baij->garray[aj[j]]*bs;
877         for (k=0; k<bs; k++) {
878           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
879           col++; a += bs;
880         }
881       }
882     }
883     ierr = PetscFree(rvals);CHKERRQ(ierr);
884     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
885     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
886     /*
887        Everyone has to call to draw the matrix since the graphics waits are
888        synchronized across all processors that share the PetscDraw object
889     */
890     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
891     if (!rank) {
892       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
893       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
894     }
895     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
896     ierr = MatDestroy(A);CHKERRQ(ierr);
897   }
898   PetscFunctionReturn(0);
899 }
900 
901 #undef __FUNCT__
902 #define __FUNCT__ "MatView_MPISBAIJ"
903 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
904 {
905   int        ierr;
906   PetscTruth isascii,isdraw,issocket,isbinary;
907 
908   PetscFunctionBegin;
909   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
910   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
911   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
912   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
913   if (isascii || isdraw || issocket || isbinary) {
914     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
915   } else {
916     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
917   }
918   PetscFunctionReturn(0);
919 }
920 
921 #undef __FUNCT__
922 #define __FUNCT__ "MatDestroy_MPISBAIJ"
923 int MatDestroy_MPISBAIJ(Mat mat)
924 {
925   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
926   int         ierr;
927 
928   PetscFunctionBegin;
929 #if defined(PETSC_USE_LOG)
930   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
931 #endif
932   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
933   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
934   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
935   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
936   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
937 #if defined (PETSC_USE_CTABLE)
938   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
939 #else
940   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
941 #endif
942   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
943   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
944   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
945   if (baij->slvec0) {
946     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
947     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
948   }
949   if (baij->slvec1) {
950     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
951     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
952     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
953   }
954   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
955   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
956   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
957   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
958 #if defined(PETSC_USE_MAT_SINGLE)
959   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
960 #endif
961   ierr = PetscFree(baij);CHKERRQ(ierr);
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMult_MPISBAIJ"
967 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
970   int         ierr,nt,mbs=a->mbs,bs=a->bs;
971   PetscScalar *x,*from,zero=0.0;
972 
973   PetscFunctionBegin;
974   /*
975   PetscSynchronizedPrintf(A->comm," _1comm is called ...\n");
976   PetscSynchronizedFlush(A->comm);
977   */
978   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
979   if (nt != A->n) {
980     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
981   }
982   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
983   if (nt != A->m) {
984     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
985   }
986 
987   /* diagonal part */
988   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
989   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
990 
991   /* subdiagonal part */
992   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
993 
994   /* copy x into the vec slvec0 */
995   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
996   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
997   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
998   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
999 
1000   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1001   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1003 
1004   /* supperdiagonal part */
1005   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
1006 
1007   PetscFunctionReturn(0);
1008 }
1009 
1010 #undef __FUNCT__
1011 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
1012 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
1013 {
1014   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1015   int         ierr,nt;
1016 
1017   PetscFunctionBegin;
1018   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
1019   if (nt != A->n) {
1020     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
1021   }
1022   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1023   if (nt != A->m) {
1024     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1025   }
1026 
1027   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1028   /* do diagonal part */
1029   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1030   /* do supperdiagonal part */
1031   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1032   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1033   /* do subdiagonal part */
1034   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1035   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1036   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1037 
1038   PetscFunctionReturn(0);
1039 }
1040 
1041 #undef __FUNCT__
1042 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1043 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1044 {
1045   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1046   int          ierr,mbs=a->mbs,bs=a->bs;
1047   PetscScalar  *x,*from,zero=0.0;
1048 
1049   PetscFunctionBegin;
1050   /*
1051   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1052   PetscSynchronizedFlush(A->comm);
1053   */
1054   /* diagonal part */
1055   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1056   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1057 
1058   /* subdiagonal part */
1059   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1060 
1061   /* copy x into the vec slvec0 */
1062   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1063   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1064   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1065   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1066 
1067   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1068   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1069   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1070 
1071   /* supperdiagonal part */
1072   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1073 
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1079 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1080 {
1081   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1082   int        ierr;
1083 
1084   PetscFunctionBegin;
1085   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1086   /* do diagonal part */
1087   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1088   /* do supperdiagonal part */
1089   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1090   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1091 
1092   /* do subdiagonal part */
1093   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1094   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1095   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1096 
1097   PetscFunctionReturn(0);
1098 }
1099 
1100 #undef __FUNCT__
1101 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1102 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1103 {
1104   PetscFunctionBegin;
1105   SETERRQ(1,"Matrix is symmetric. Call MatMult().");
1106   /* PetscFunctionReturn(0); */
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1111 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1112 {
1113   PetscFunctionBegin;
1114   SETERRQ(1,"Matrix is symmetric. Call MatMultAdd().");
1115   /* PetscFunctionReturn(0); */
1116 }
1117 
1118 /*
1119   This only works correctly for square matrices where the subblock A->A is the
1120    diagonal block
1121 */
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1124 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1125 {
1126   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1127   int         ierr;
1128 
1129   PetscFunctionBegin;
1130   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1131   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatScale_MPISBAIJ"
1137 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1138 {
1139   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1140   int         ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1144   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1150 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1151 {
1152   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1153   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1154   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1155   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1156   int            *cmap,*idx_p,cstart = mat->cstart;
1157 
1158   PetscFunctionBegin;
1159   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1160   mat->getrowactive = PETSC_TRUE;
1161 
1162   if (!mat->rowvalues && (idx || v)) {
1163     /*
1164         allocate enough space to hold information from the longest row.
1165     */
1166     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1167     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1168     int     max = 1,mbs = mat->mbs,tmp;
1169     for (i=0; i<mbs; i++) {
1170       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1171       if (max < tmp) { max = tmp; }
1172     }
1173     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1174     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1175   }
1176 
1177   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1178   lrow = row - brstart;  /* local row index */
1179 
1180   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1181   if (!v)   {pvA = 0; pvB = 0;}
1182   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1183   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1184   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1185   nztot = nzA + nzB;
1186 
1187   cmap  = mat->garray;
1188   if (v  || idx) {
1189     if (nztot) {
1190       /* Sort by increasing column numbers, assuming A and B already sorted */
1191       int imark = -1;
1192       if (v) {
1193         *v = v_p = mat->rowvalues;
1194         for (i=0; i<nzB; i++) {
1195           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1196           else break;
1197         }
1198         imark = i;
1199         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1200         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1201       }
1202       if (idx) {
1203         *idx = idx_p = mat->rowindices;
1204         if (imark > -1) {
1205           for (i=0; i<imark; i++) {
1206             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1207           }
1208         } else {
1209           for (i=0; i<nzB; i++) {
1210             if (cmap[cworkB[i]/bs] < cstart)
1211               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1212             else break;
1213           }
1214           imark = i;
1215         }
1216         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1217         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1218       }
1219     } else {
1220       if (idx) *idx = 0;
1221       if (v)   *v   = 0;
1222     }
1223   }
1224   *nz = nztot;
1225   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1226   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1227   PetscFunctionReturn(0);
1228 }
1229 
1230 #undef __FUNCT__
1231 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1232 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1233 {
1234   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1235 
1236   PetscFunctionBegin;
1237   if (baij->getrowactive == PETSC_FALSE) {
1238     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1239   }
1240   baij->getrowactive = PETSC_FALSE;
1241   PetscFunctionReturn(0);
1242 }
1243 
1244 #undef __FUNCT__
1245 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1246 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1247 {
1248   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1249 
1250   PetscFunctionBegin;
1251   *bs = baij->bs;
1252   PetscFunctionReturn(0);
1253 }
1254 
1255 #undef __FUNCT__
1256 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1257 int MatZeroEntries_MPISBAIJ(Mat A)
1258 {
1259   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1260   int         ierr;
1261 
1262   PetscFunctionBegin;
1263   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1264   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1265   PetscFunctionReturn(0);
1266 }
1267 
1268 #undef __FUNCT__
1269 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1270 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1271 {
1272   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1273   Mat         A = a->A,B = a->B;
1274   int         ierr;
1275   PetscReal   isend[5],irecv[5];
1276 
1277   PetscFunctionBegin;
1278   info->block_size     = (PetscReal)a->bs;
1279   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1280   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1281   isend[3] = info->memory;  isend[4] = info->mallocs;
1282   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1283   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1284   isend[3] += info->memory;  isend[4] += info->mallocs;
1285   if (flag == MAT_LOCAL) {
1286     info->nz_used      = isend[0];
1287     info->nz_allocated = isend[1];
1288     info->nz_unneeded  = isend[2];
1289     info->memory       = isend[3];
1290     info->mallocs      = isend[4];
1291   } else if (flag == MAT_GLOBAL_MAX) {
1292     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1293     info->nz_used      = irecv[0];
1294     info->nz_allocated = irecv[1];
1295     info->nz_unneeded  = irecv[2];
1296     info->memory       = irecv[3];
1297     info->mallocs      = irecv[4];
1298   } else if (flag == MAT_GLOBAL_SUM) {
1299     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1300     info->nz_used      = irecv[0];
1301     info->nz_allocated = irecv[1];
1302     info->nz_unneeded  = irecv[2];
1303     info->memory       = irecv[3];
1304     info->mallocs      = irecv[4];
1305   } else {
1306     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1307   }
1308   info->rows_global       = (PetscReal)A->M;
1309   info->columns_global    = (PetscReal)A->N;
1310   info->rows_local        = (PetscReal)A->m;
1311   info->columns_local     = (PetscReal)A->N;
1312   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1313   info->fill_ratio_needed = 0;
1314   info->factor_mallocs    = 0;
1315   PetscFunctionReturn(0);
1316 }
1317 
1318 #undef __FUNCT__
1319 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1320 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1321 {
1322   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1323   int         ierr;
1324 
1325   PetscFunctionBegin;
1326   switch (op) {
1327   case MAT_NO_NEW_NONZERO_LOCATIONS:
1328   case MAT_YES_NEW_NONZERO_LOCATIONS:
1329   case MAT_COLUMNS_UNSORTED:
1330   case MAT_COLUMNS_SORTED:
1331   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1332   case MAT_KEEP_ZEROED_ROWS:
1333   case MAT_NEW_NONZERO_LOCATION_ERR:
1334     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1335     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1336     break;
1337   case MAT_ROW_ORIENTED:
1338     a->roworiented = PETSC_TRUE;
1339     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1340     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1341     break;
1342   case MAT_ROWS_SORTED:
1343   case MAT_ROWS_UNSORTED:
1344   case MAT_YES_NEW_DIAGONALS:
1345     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1346     break;
1347   case MAT_COLUMN_ORIENTED:
1348     a->roworiented = PETSC_FALSE;
1349     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1350     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1351     break;
1352   case MAT_IGNORE_OFF_PROC_ENTRIES:
1353     a->donotstash = PETSC_TRUE;
1354     break;
1355   case MAT_NO_NEW_DIAGONALS:
1356     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1357   case MAT_USE_HASH_TABLE:
1358     a->ht_flag = PETSC_TRUE;
1359     break;
1360   default:
1361     SETERRQ(PETSC_ERR_SUP,"unknown option");
1362   }
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1368 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1369 {
1370   int ierr;
1371   PetscFunctionBegin;
1372   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1373   PetscFunctionReturn(0);
1374 }
1375 
1376 #undef __FUNCT__
1377 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1378 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1379 {
1380   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1381   Mat         a = baij->A,b = baij->B;
1382   int         ierr,s1,s2,s3;
1383 
1384   PetscFunctionBegin;
1385   if (ll != rr) {
1386     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1387   }
1388   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1389   if (rr) {
1390     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1391     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1392     /* Overlap communication with computation. */
1393     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1394     /*} if (ll) { */
1395     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1396     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1397     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1398     /* } */
1399   /* scale  the diagonal block */
1400   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1401 
1402   /* if (rr) { */
1403     /* Do a scatter end and then right scale the off-diagonal block */
1404     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1405     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1406   }
1407 
1408   PetscFunctionReturn(0);
1409 }
1410 
1411 #undef __FUNCT__
1412 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1413 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1414 {
1415   PetscFunctionBegin;
1416   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1417 }
1418 
1419 #undef __FUNCT__
1420 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1421 int MatPrintHelp_MPISBAIJ(Mat A)
1422 {
1423   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1424   MPI_Comm    comm = A->comm;
1425   static int  called = 0;
1426   int         ierr;
1427 
1428   PetscFunctionBegin;
1429   if (!a->rank) {
1430     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1431   }
1432   if (called) {PetscFunctionReturn(0);} else called = 1;
1433   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1434   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1435   PetscFunctionReturn(0);
1436 }
1437 
1438 #undef __FUNCT__
1439 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1440 int MatSetUnfactored_MPISBAIJ(Mat A)
1441 {
1442   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1443   int         ierr;
1444 
1445   PetscFunctionBegin;
1446   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1447   PetscFunctionReturn(0);
1448 }
1449 
1450 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1451 
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatEqual_MPISBAIJ"
1454 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1455 {
1456   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1457   Mat         a,b,c,d;
1458   PetscTruth  flg;
1459   int         ierr;
1460 
1461   PetscFunctionBegin;
1462   a = matA->A; b = matA->B;
1463   c = matB->A; d = matB->B;
1464 
1465   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1466   if (flg == PETSC_TRUE) {
1467     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1468   }
1469   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1470   PetscFunctionReturn(0);
1471 }
1472 
1473 #undef __FUNCT__
1474 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1475 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1476 {
1477   int        ierr;
1478 
1479   PetscFunctionBegin;
1480   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 /* -------------------------------------------------------------------*/
1484 static struct _MatOps MatOps_Values = {
1485   MatSetValues_MPISBAIJ,
1486   MatGetRow_MPISBAIJ,
1487   MatRestoreRow_MPISBAIJ,
1488   MatMult_MPISBAIJ,
1489   MatMultAdd_MPISBAIJ,
1490   MatMultTranspose_MPISBAIJ,
1491   MatMultTransposeAdd_MPISBAIJ,
1492   0,
1493   0,
1494   0,
1495   0,
1496   0,
1497   0,
1498   MatRelax_MPISBAIJ,
1499   MatTranspose_MPISBAIJ,
1500   MatGetInfo_MPISBAIJ,
1501   MatEqual_MPISBAIJ,
1502   MatGetDiagonal_MPISBAIJ,
1503   MatDiagonalScale_MPISBAIJ,
1504   MatNorm_MPISBAIJ,
1505   MatAssemblyBegin_MPISBAIJ,
1506   MatAssemblyEnd_MPISBAIJ,
1507   0,
1508   MatSetOption_MPISBAIJ,
1509   MatZeroEntries_MPISBAIJ,
1510   MatZeroRows_MPISBAIJ,
1511   0,
1512   0,
1513   0,
1514   0,
1515   MatSetUpPreallocation_MPISBAIJ,
1516   0,
1517   0,
1518   0,
1519   0,
1520   MatDuplicate_MPISBAIJ,
1521   0,
1522   0,
1523   0,
1524   0,
1525   0,
1526   0,
1527   0,
1528   MatGetValues_MPISBAIJ,
1529   0,
1530   MatPrintHelp_MPISBAIJ,
1531   MatScale_MPISBAIJ,
1532   0,
1533   0,
1534   0,
1535   MatGetBlockSize_MPISBAIJ,
1536   0,
1537   0,
1538   0,
1539   0,
1540   0,
1541   0,
1542   MatSetUnfactored_MPISBAIJ,
1543   0,
1544   MatSetValuesBlocked_MPISBAIJ,
1545   0,
1546   0,
1547   0,
1548   MatGetPetscMaps_Petsc,
1549   0,
1550   0,
1551   0,
1552   0,
1553   0,
1554   0,
1555   MatGetRowMax_MPISBAIJ};
1556 
1557 
1558 EXTERN_C_BEGIN
1559 #undef __FUNCT__
1560 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1561 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1562 {
1563   PetscFunctionBegin;
1564   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1565   *iscopy = PETSC_FALSE;
1566   PetscFunctionReturn(0);
1567 }
1568 EXTERN_C_END
1569 
1570 EXTERN_C_BEGIN
1571 #undef __FUNCT__
1572 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1573 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1574 {
1575   Mat_MPISBAIJ *b;
1576   int          ierr,i,mbs,Mbs;
1577 
1578   PetscFunctionBegin;
1579   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1580 
1581   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1582   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1583   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1584   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1585   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1586   if (d_nnz) {
1587     for (i=0; i<B->m/bs; i++) {
1588       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1589     }
1590   }
1591   if (o_nnz) {
1592     for (i=0; i<B->m/bs; i++) {
1593       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1594     }
1595   }
1596   B->preallocated = PETSC_TRUE;
1597   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1598   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1599   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1600   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1601 
1602   b   = (Mat_MPISBAIJ*)B->data;
1603   mbs = B->m/bs;
1604   Mbs = B->M/bs;
1605   if (mbs*bs != B->m) {
1606     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1607   }
1608 
1609   b->bs  = bs;
1610   b->bs2 = bs*bs;
1611   b->mbs = mbs;
1612   b->nbs = mbs;
1613   b->Mbs = Mbs;
1614   b->Nbs = Mbs;
1615 
1616   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1617   b->rowners[0]    = 0;
1618   for (i=2; i<=b->size; i++) {
1619     b->rowners[i] += b->rowners[i-1];
1620   }
1621   b->rstart    = b->rowners[b->rank];
1622   b->rend      = b->rowners[b->rank+1];
1623   b->cstart    = b->rstart;
1624   b->cend      = b->rend;
1625   for (i=0; i<=b->size; i++) {
1626     b->rowners_bs[i] = b->rowners[i]*bs;
1627   }
1628   b->rstart_bs = b-> rstart*bs;
1629   b->rend_bs   = b->rend*bs;
1630 
1631   b->cstart_bs = b->cstart*bs;
1632   b->cend_bs   = b->cend*bs;
1633 
1634 
1635   ierr = MatCreateSeqSBAIJ(PETSC_COMM_SELF,bs,B->m,B->m,d_nz,d_nnz,&b->A);CHKERRQ(ierr);
1636   PetscLogObjectParent(B,b->A);
1637   ierr = MatCreateSeqBAIJ(PETSC_COMM_SELF,bs,B->m,B->M,o_nz,o_nnz,&b->B);CHKERRQ(ierr);
1638   PetscLogObjectParent(B,b->B);
1639 
1640   /* build cache for off array entries formed */
1641   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1642 
1643   PetscFunctionReturn(0);
1644 }
1645 EXTERN_C_END
1646 
1647 EXTERN_C_BEGIN
1648 #undef __FUNCT__
1649 #define __FUNCT__ "MatCreate_MPISBAIJ"
1650 int MatCreate_MPISBAIJ(Mat B)
1651 {
1652   Mat_MPISBAIJ *b;
1653   int          ierr;
1654   PetscTruth   flg;
1655 
1656   PetscFunctionBegin;
1657 
1658   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1659   B->data = (void*)b;
1660   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1661   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1662 
1663   B->ops->destroy    = MatDestroy_MPISBAIJ;
1664   B->ops->view       = MatView_MPISBAIJ;
1665   B->mapping    = 0;
1666   B->factor     = 0;
1667   B->assembled  = PETSC_FALSE;
1668 
1669   B->insertmode = NOT_SET_VALUES;
1670   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1671   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1672 
1673   /* build local table of row and column ownerships */
1674   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1675   b->cowners    = b->rowners + b->size + 2;
1676   b->rowners_bs = b->cowners + b->size + 2;
1677   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1678 
1679   /* build cache for off array entries formed */
1680   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1681   b->donotstash  = PETSC_FALSE;
1682   b->colmap      = PETSC_NULL;
1683   b->garray      = PETSC_NULL;
1684   b->roworiented = PETSC_TRUE;
1685 
1686 #if defined(PETSC_USE_MAT_SINGLE)
1687   /* stuff for MatSetValues_XXX in single precision */
1688   b->setvalueslen     = 0;
1689   b->setvaluescopy    = PETSC_NULL;
1690 #endif
1691 
1692   /* stuff used in block assembly */
1693   b->barray       = 0;
1694 
1695   /* stuff used for matrix vector multiply */
1696   b->lvec         = 0;
1697   b->Mvctx        = 0;
1698   b->slvec0       = 0;
1699   b->slvec0b      = 0;
1700   b->slvec1       = 0;
1701   b->slvec1a      = 0;
1702   b->slvec1b      = 0;
1703   b->sMvctx       = 0;
1704 
1705   /* stuff for MatGetRow() */
1706   b->rowindices   = 0;
1707   b->rowvalues    = 0;
1708   b->getrowactive = PETSC_FALSE;
1709 
1710   /* hash table stuff */
1711   b->ht           = 0;
1712   b->hd           = 0;
1713   b->ht_size      = 0;
1714   b->ht_flag      = PETSC_FALSE;
1715   b->ht_fact      = 0;
1716   b->ht_total_ct  = 0;
1717   b->ht_insert_ct = 0;
1718 
1719   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1720   if (flg) {
1721     PetscReal fact = 1.39;
1722     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1723     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1724     if (fact <= 1.0) fact = 1.39;
1725     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1726     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1727   }
1728   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1729                                      "MatStoreValues_MPISBAIJ",
1730                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1731   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1732                                      "MatRetrieveValues_MPISBAIJ",
1733                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1734   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1735                                      "MatGetDiagonalBlock_MPISBAIJ",
1736                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1737   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1738                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1739                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1740   PetscFunctionReturn(0);
1741 }
1742 EXTERN_C_END
1743 
1744 #undef __FUNCT__
1745 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1746 /*@C
1747    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1748    the user should preallocate the matrix storage by setting the parameters
1749    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1750    performance can be increased by more than a factor of 50.
1751 
1752    Collective on Mat
1753 
1754    Input Parameters:
1755 +  A - the matrix
1756 .  bs   - size of blockk
1757 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1758            submatrix  (same for all local rows)
1759 .  d_nnz - array containing the number of block nonzeros in the various block rows
1760            in the upper triangular and diagonal part of the in diagonal portion of the local
1761            (possibly different for each block row) or PETSC_NULL.  You must leave room
1762            for the diagonal entry even if it is zero.
1763 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1764            submatrix (same for all local rows).
1765 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1766            off-diagonal portion of the local submatrix (possibly different for
1767            each block row) or PETSC_NULL.
1768 
1769 
1770    Options Database Keys:
1771 .   -mat_no_unroll - uses code that does not unroll the loops in the
1772                      block calculations (much slower)
1773 .   -mat_block_size - size of the blocks to use
1774 
1775    Notes:
1776 
1777    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1778    than it must be used on all processors that share the object for that argument.
1779 
1780    Storage Information:
1781    For a square global matrix we define each processor's diagonal portion
1782    to be its local rows and the corresponding columns (a square submatrix);
1783    each processor's off-diagonal portion encompasses the remainder of the
1784    local matrix (a rectangular submatrix).
1785 
1786    The user can specify preallocated storage for the diagonal part of
1787    the local submatrix with either d_nz or d_nnz (not both).  Set
1788    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1789    memory allocation.  Likewise, specify preallocated storage for the
1790    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1791 
1792    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1793    the figure below we depict these three local rows and all columns (0-11).
1794 
1795 .vb
1796            0 1 2 3 4 5 6 7 8 9 10 11
1797           -------------------
1798    row 3  |  o o o d d d o o o o o o
1799    row 4  |  o o o d d d o o o o o o
1800    row 5  |  o o o d d d o o o o o o
1801           -------------------
1802 .ve
1803 
1804    Thus, any entries in the d locations are stored in the d (diagonal)
1805    submatrix, and any entries in the o locations are stored in the
1806    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1807    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1808 
1809    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1810    plus the diagonal part of the d matrix,
1811    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1812    In general, for PDE problems in which most nonzeros are near the diagonal,
1813    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1814    or you will get TERRIBLE performance; see the users' manual chapter on
1815    matrices.
1816 
1817    Level: intermediate
1818 
1819 .keywords: matrix, block, aij, compressed row, sparse, parallel
1820 
1821 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1822 @*/
1823 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1824 {
1825   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1826 
1827   PetscFunctionBegin;
1828   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1829   if (f) {
1830     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1831   }
1832   PetscFunctionReturn(0);
1833 }
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatCreateMPISBAIJ"
1837 /*@C
1838    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1839    (block compressed row).  For good matrix assembly performance
1840    the user should preallocate the matrix storage by setting the parameters
1841    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1842    performance can be increased by more than a factor of 50.
1843 
1844    Collective on MPI_Comm
1845 
1846    Input Parameters:
1847 +  comm - MPI communicator
1848 .  bs   - size of blockk
1849 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1850            This value should be the same as the local size used in creating the
1851            y vector for the matrix-vector product y = Ax.
1852 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1853            This value should be the same as the local size used in creating the
1854            x vector for the matrix-vector product y = Ax.
1855 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1856 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1857 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1858            submatrix  (same for all local rows)
1859 .  d_nnz - array containing the number of block nonzeros in the various block rows
1860            in the upper triangular portion of the in diagonal portion of the local
1861            (possibly different for each block block row) or PETSC_NULL.
1862            You must leave room for the diagonal entry even if it is zero.
1863 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1864            submatrix (same for all local rows).
1865 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1866            off-diagonal portion of the local submatrix (possibly different for
1867            each block row) or PETSC_NULL.
1868 
1869    Output Parameter:
1870 .  A - the matrix
1871 
1872    Options Database Keys:
1873 .   -mat_no_unroll - uses code that does not unroll the loops in the
1874                      block calculations (much slower)
1875 .   -mat_block_size - size of the blocks to use
1876 .   -mat_mpi - use the parallel matrix data structures even on one processor
1877                (defaults to using SeqBAIJ format on one processor)
1878 
1879    Notes:
1880    The user MUST specify either the local or global matrix dimensions
1881    (possibly both).
1882 
1883    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1884    than it must be used on all processors that share the object for that argument.
1885 
1886    Storage Information:
1887    For a square global matrix we define each processor's diagonal portion
1888    to be its local rows and the corresponding columns (a square submatrix);
1889    each processor's off-diagonal portion encompasses the remainder of the
1890    local matrix (a rectangular submatrix).
1891 
1892    The user can specify preallocated storage for the diagonal part of
1893    the local submatrix with either d_nz or d_nnz (not both).  Set
1894    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1895    memory allocation.  Likewise, specify preallocated storage for the
1896    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1897 
1898    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1899    the figure below we depict these three local rows and all columns (0-11).
1900 
1901 .vb
1902            0 1 2 3 4 5 6 7 8 9 10 11
1903           -------------------
1904    row 3  |  o o o d d d o o o o o o
1905    row 4  |  o o o d d d o o o o o o
1906    row 5  |  o o o d d d o o o o o o
1907           -------------------
1908 .ve
1909 
1910    Thus, any entries in the d locations are stored in the d (diagonal)
1911    submatrix, and any entries in the o locations are stored in the
1912    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1913    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1914 
1915    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1916    plus the diagonal part of the d matrix,
1917    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1918    In general, for PDE problems in which most nonzeros are near the diagonal,
1919    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1920    or you will get TERRIBLE performance; see the users' manual chapter on
1921    matrices.
1922 
1923    Level: intermediate
1924 
1925 .keywords: matrix, block, aij, compressed row, sparse, parallel
1926 
1927 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1928 @*/
1929 
1930 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
1931 {
1932   int ierr,size;
1933 
1934   PetscFunctionBegin;
1935   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1936   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1937   if (size > 1) {
1938     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1939     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1940   } else {
1941     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1942     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1943   }
1944   PetscFunctionReturn(0);
1945 }
1946 
1947 
1948 #undef __FUNCT__
1949 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1950 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1951 {
1952   Mat          mat;
1953   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1954   int          ierr,len=0;
1955 
1956   PetscFunctionBegin;
1957   *newmat       = 0;
1958   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1959   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
1960   mat->preallocated = PETSC_TRUE;
1961   a = (Mat_MPISBAIJ*)mat->data;
1962   a->bs  = oldmat->bs;
1963   a->bs2 = oldmat->bs2;
1964   a->mbs = oldmat->mbs;
1965   a->nbs = oldmat->nbs;
1966   a->Mbs = oldmat->Mbs;
1967   a->Nbs = oldmat->Nbs;
1968 
1969   a->rstart       = oldmat->rstart;
1970   a->rend         = oldmat->rend;
1971   a->cstart       = oldmat->cstart;
1972   a->cend         = oldmat->cend;
1973   a->size         = oldmat->size;
1974   a->rank         = oldmat->rank;
1975   a->donotstash   = oldmat->donotstash;
1976   a->roworiented  = oldmat->roworiented;
1977   a->rowindices   = 0;
1978   a->rowvalues    = 0;
1979   a->getrowactive = PETSC_FALSE;
1980   a->barray       = 0;
1981   a->rstart_bs    = oldmat->rstart_bs;
1982   a->rend_bs      = oldmat->rend_bs;
1983   a->cstart_bs    = oldmat->cstart_bs;
1984   a->cend_bs      = oldmat->cend_bs;
1985 
1986   /* hash table stuff */
1987   a->ht           = 0;
1988   a->hd           = 0;
1989   a->ht_size      = 0;
1990   a->ht_flag      = oldmat->ht_flag;
1991   a->ht_fact      = oldmat->ht_fact;
1992   a->ht_total_ct  = 0;
1993   a->ht_insert_ct = 0;
1994 
1995   ierr = PetscMalloc(3*(a->size+2)*sizeof(int),&a->rowners);CHKERRQ(ierr);
1996   PetscLogObjectMemory(mat,3*(a->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1997   a->cowners    = a->rowners + a->size + 2;
1998   a->rowners_bs = a->cowners + a->size + 2;
1999   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2000   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2001   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2002   if (oldmat->colmap) {
2003 #if defined (PETSC_USE_CTABLE)
2004     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2005 #else
2006     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2007     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2008     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2009 #endif
2010   } else a->colmap = 0;
2011   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2012     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2013     PetscLogObjectMemory(mat,len*sizeof(int));
2014     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2015   } else a->garray = 0;
2016 
2017   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2018   PetscLogObjectParent(mat,a->lvec);
2019   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2020 
2021   PetscLogObjectParent(mat,a->Mvctx);
2022   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2023   PetscLogObjectParent(mat,a->A);
2024   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2025   PetscLogObjectParent(mat,a->B);
2026   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2027   *newmat = mat;
2028   PetscFunctionReturn(0);
2029 }
2030 
2031 #include "petscsys.h"
2032 
2033 EXTERN_C_BEGIN
2034 #undef __FUNCT__
2035 #define __FUNCT__ "MatLoad_MPISBAIJ"
2036 int MatLoad_MPISBAIJ(PetscViewer viewer,MatType type,Mat *newmat)
2037 {
2038   Mat          A;
2039   int          i,nz,ierr,j,rstart,rend,fd;
2040   PetscScalar  *vals,*buf;
2041   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2042   MPI_Status   status;
2043   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2044   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2045   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2046   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2047   int          dcount,kmax,k,nzcount,tmp;
2048 #if defined(PETSC_HAVE_MUMPS)
2049   PetscTruth   flag;
2050 #endif
2051 
2052   PetscFunctionBegin;
2053   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2054 
2055   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2056   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2057   if (!rank) {
2058     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2059     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2060     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2061     if (header[3] < 0) {
2062       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2063     }
2064   }
2065 
2066   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2067   M = header[1]; N = header[2];
2068 
2069   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2070 
2071   /*
2072      This code adds extra rows to make sure the number of rows is
2073      divisible by the blocksize
2074   */
2075   Mbs        = M/bs;
2076   extra_rows = bs - M + bs*(Mbs);
2077   if (extra_rows == bs) extra_rows = 0;
2078   else                  Mbs++;
2079   if (extra_rows &&!rank) {
2080     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2081   }
2082 
2083   /* determine ownership of all rows */
2084   mbs        = Mbs/size + ((Mbs % size) > rank);
2085   m          = mbs*bs;
2086   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2087   browners   = rowners + size + 1;
2088   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2089   rowners[0] = 0;
2090   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2091   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2092   rstart = rowners[rank];
2093   rend   = rowners[rank+1];
2094 
2095   /* distribute row lengths to all processors */
2096   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2097   if (!rank) {
2098     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2099     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2100     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2101     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2102     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2103     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2104     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2105   } else {
2106     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2107   }
2108 
2109   if (!rank) {   /* procs[0] */
2110     /* calculate the number of nonzeros on each processor */
2111     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2112     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2113     for (i=0; i<size; i++) {
2114       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2115         procsnz[i] += rowlengths[j];
2116       }
2117     }
2118     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2119 
2120     /* determine max buffer needed and allocate it */
2121     maxnz = 0;
2122     for (i=0; i<size; i++) {
2123       maxnz = PetscMax(maxnz,procsnz[i]);
2124     }
2125     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2126 
2127     /* read in my part of the matrix column indices  */
2128     nz     = procsnz[0];
2129     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2130     mycols = ibuf;
2131     if (size == 1)  nz -= extra_rows;
2132     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2133     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2134 
2135     /* read in every ones (except the last) and ship off */
2136     for (i=1; i<size-1; i++) {
2137       nz   = procsnz[i];
2138       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2139       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2140     }
2141     /* read in the stuff for the last proc */
2142     if (size != 1) {
2143       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2144       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2145       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2146       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2147     }
2148     ierr = PetscFree(cols);CHKERRQ(ierr);
2149   } else {  /* procs[i], i>0 */
2150     /* determine buffer space needed for message */
2151     nz = 0;
2152     for (i=0; i<m; i++) {
2153       nz += locrowlens[i];
2154     }
2155     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2156     mycols = ibuf;
2157     /* receive message of column indices*/
2158     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2159     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2160     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2161   }
2162 
2163   /* loop over local rows, determining number of off diagonal entries */
2164   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2165   odlens   = dlens + (rend-rstart);
2166   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2167   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2168   masked1  = mask    + Mbs;
2169   masked2  = masked1 + Mbs;
2170   rowcount = 0; nzcount = 0;
2171   for (i=0; i<mbs; i++) {
2172     dcount  = 0;
2173     odcount = 0;
2174     for (j=0; j<bs; j++) {
2175       kmax = locrowlens[rowcount];
2176       for (k=0; k<kmax; k++) {
2177         tmp = mycols[nzcount++]/bs; /* block col. index */
2178         if (!mask[tmp]) {
2179           mask[tmp] = 1;
2180           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2181           else masked1[dcount++] = tmp; /* entry in diag portion */
2182         }
2183       }
2184       rowcount++;
2185     }
2186 
2187     dlens[i]  = dcount;  /* d_nzz[i] */
2188     odlens[i] = odcount; /* o_nzz[i] */
2189 
2190     /* zero out the mask elements we set */
2191     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2192     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2193   }
2194 
2195   /* create our matrix */
2196   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2197   ierr = MatSetType(A,type);CHKERRQ(ierr);
2198   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2199   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2200 
2201   if (!rank) {
2202     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2203     /* read in my part of the matrix numerical values  */
2204     nz = procsnz[0];
2205     vals = buf;
2206     mycols = ibuf;
2207     if (size == 1)  nz -= extra_rows;
2208     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2209     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2210 
2211     /* insert into matrix */
2212     jj      = rstart*bs;
2213     for (i=0; i<m; i++) {
2214       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2215       mycols += locrowlens[i];
2216       vals   += locrowlens[i];
2217       jj++;
2218     }
2219 
2220     /* read in other processors (except the last one) and ship out */
2221     for (i=1; i<size-1; i++) {
2222       nz   = procsnz[i];
2223       vals = buf;
2224       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2225       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2226     }
2227     /* the last proc */
2228     if (size != 1){
2229       nz   = procsnz[i] - extra_rows;
2230       vals = buf;
2231       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2232       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2233       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2234     }
2235     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2236 
2237   } else {
2238     /* receive numeric values */
2239     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2240 
2241     /* receive message of values*/
2242     vals   = buf;
2243     mycols = ibuf;
2244     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2245     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2246     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2247 
2248     /* insert into matrix */
2249     jj      = rstart*bs;
2250     for (i=0; i<m; i++) {
2251       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2252       mycols += locrowlens[i];
2253       vals   += locrowlens[i];
2254       jj++;
2255     }
2256   }
2257 
2258   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2259   ierr = PetscFree(buf);CHKERRQ(ierr);
2260   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2261   ierr = PetscFree(rowners);CHKERRQ(ierr);
2262   ierr = PetscFree(dlens);CHKERRQ(ierr);
2263   ierr = PetscFree(mask);CHKERRQ(ierr);
2264   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2265   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2266 #if defined(PETSC_HAVE_MUMPS)
2267   ierr = PetscOptionsHasName(A->prefix,"-mat_sbaij_mumps",&flag);CHKERRQ(ierr);
2268   if (flag) {
2269       ierr = MatUseMUMPS_MPIAIJ(A);CHKERRQ(ierr);
2270   }
2271 #endif
2272   *newmat = A;
2273   PetscFunctionReturn(0);
2274 }
2275 EXTERN_C_END
2276 
2277 #undef __FUNCT__
2278 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2279 /*@
2280    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2281 
2282    Input Parameters:
2283 .  mat  - the matrix
2284 .  fact - factor
2285 
2286    Collective on Mat
2287 
2288    Level: advanced
2289 
2290   Notes:
2291    This can also be set by the command line option: -mat_use_hash_table fact
2292 
2293 .keywords: matrix, hashtable, factor, HT
2294 
2295 .seealso: MatSetOption()
2296 @*/
2297 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2298 {
2299   PetscFunctionBegin;
2300   SETERRQ(1,"Function not yet written for SBAIJ format");
2301   /* PetscFunctionReturn(0); */
2302 }
2303 
2304 #undef __FUNCT__
2305 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2306 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2307 {
2308   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2309   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2310   PetscReal    atmp;
2311   PetscReal    *work,*svalues,*rvalues;
2312   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2313   int          rank,size,*rowners_bs,dest,count,source;
2314   PetscScalar  *va;
2315   MatScalar    *ba;
2316   MPI_Status   stat;
2317 
2318   PetscFunctionBegin;
2319   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2320   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2321 
2322   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2323   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2324 
2325   bs   = a->bs;
2326   mbs  = a->mbs;
2327   Mbs  = a->Mbs;
2328   ba   = b->a;
2329   bi   = b->i;
2330   bj   = b->j;
2331   /*
2332   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2333   PetscSynchronizedFlush(A->comm);
2334   */
2335 
2336   /* find ownerships */
2337   rowners_bs = a->rowners_bs;
2338   /*
2339   if (!rank){
2340     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2341   }
2342   */
2343 
2344   /* each proc creates an array to be distributed */
2345   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2346   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2347 
2348   /* row_max for B */
2349   if (rank != size-1){
2350     for (i=0; i<mbs; i++) {
2351       ncols = bi[1] - bi[0]; bi++;
2352       brow  = bs*i;
2353       for (j=0; j<ncols; j++){
2354         bcol = bs*(*bj);
2355         for (kcol=0; kcol<bs; kcol++){
2356           col = bcol + kcol;                 /* local col index */
2357           col += rowners_bs[rank+1];      /* global col index */
2358           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2359           for (krow=0; krow<bs; krow++){
2360             atmp = PetscAbsScalar(*ba); ba++;
2361             row = brow + krow;    /* local row index */
2362             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2363             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2364             if (work[col] < atmp) work[col] = atmp;
2365           }
2366         }
2367         bj++;
2368       }
2369     }
2370     /*
2371       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2372       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2373       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2374       */
2375 
2376     /* send values to its owners */
2377     for (dest=rank+1; dest<size; dest++){
2378       svalues = work + rowners_bs[dest];
2379       count   = rowners_bs[dest+1]-rowners_bs[dest];
2380       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2381       /*
2382       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2383       PetscSynchronizedFlush(A->comm);
2384       */
2385     }
2386   }
2387 
2388   /* receive values */
2389   if (rank){
2390     rvalues = work;
2391     count   = rowners_bs[rank+1]-rowners_bs[rank];
2392     for (source=0; source<rank; source++){
2393       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2394       /* process values */
2395       for (i=0; i<count; i++){
2396         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2397       }
2398       /*
2399       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2400       PetscSynchronizedFlush(A->comm);
2401       */
2402     }
2403   }
2404 
2405   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2406   ierr = PetscFree(work);CHKERRQ(ierr);
2407   PetscFunctionReturn(0);
2408 }
2409 
2410 #undef __FUNCT__
2411 #define __FUNCT__ "MatRelax_MPISBAIJ"
2412 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2413 {
2414   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2415   int            ierr,mbs=mat->mbs,bs=mat->bs;
2416   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2417   Vec            bb1;
2418 
2419   PetscFunctionBegin;
2420   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2421   if (bs > 1)
2422     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2423 
2424   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2425     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2426       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2427       its--;
2428     }
2429 
2430     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2431     while (its--){
2432 
2433       /* lower triangular part: slvec0b = - B^T*xx */
2434       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2435 
2436       /* copy xx into slvec0a */
2437       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2438       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2439       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2440       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2441 
2442       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2443 
2444       /* copy bb into slvec1a */
2445       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2446       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2447       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2448       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2449 
2450       /* set slvec1b = 0 */
2451       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2452 
2453       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2454       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2455       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2456       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2457 
2458       /* upper triangular part: bb1 = bb1 - B*x */
2459       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2460 
2461       /* local diagonal sweep */
2462       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2463     }
2464     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2465   } else {
2466     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2467   }
2468   PetscFunctionReturn(0);
2469 }
2470 
2471 #undef __FUNCT__
2472 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2473 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2474 {
2475   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2476   int            ierr;
2477   PetscScalar    mone=-1.0;
2478   Vec            lvec1,bb1;
2479 
2480   PetscFunctionBegin;
2481   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2482   if (mat->bs > 1)
2483     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2484 
2485   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2486     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2487       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2488       its--;
2489     }
2490 
2491     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2492     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2493     while (its--){
2494       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2495 
2496       /* lower diagonal part: bb1 = bb - B^T*xx */
2497       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2498       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2499 
2500       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2501       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2502       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2503 
2504       /* upper diagonal part: bb1 = bb1 - B*x */
2505       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2506       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2507 
2508       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2509 
2510       /* diagonal sweep */
2511       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2512     }
2513     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2514     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2515   } else {
2516     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2517   }
2518   PetscFunctionReturn(0);
2519 }
2520 
2521