xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4482741e5b2e2bbc854fb1f8dba65221386520f2)
1 /*$Id: mpisbaij.c,v 1.61 2001/08/10 03:31:37 bsmith Exp $*/
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 extern int MatSetUpMultiply_MPISBAIJ(Mat);
8 extern int MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 extern int DisAssemble_MPISBAIJ(Mat);
10 extern int MatIncreaseOverlap_MPISBAIJ(Mat,int,IS[],int);
11 extern int MatGetValues_SeqSBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
12 extern int MatGetValues_SeqBAIJ(Mat,int,const int[],int,const int[],PetscScalar []);
13 extern int MatSetValues_SeqSBAIJ(Mat,int,const int [],int,const int [],const PetscScalar [],InsertMode);
14 extern int MatSetValuesBlocked_SeqSBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
15 extern int MatSetValuesBlocked_SeqBAIJ(Mat,int,const int[],int,const int[],const PetscScalar[],InsertMode);
16 extern int MatGetRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
17 extern int MatRestoreRow_SeqSBAIJ(Mat,int,int*,int**,PetscScalar**);
18 extern int MatPrintHelp_SeqSBAIJ(Mat);
19 extern int MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 extern int MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 extern int MatGetRowMax_MPISBAIJ(Mat,Vec);
22 extern int MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,int,int,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 extern int MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
33 extern int MatSetValues_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
34 extern int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
35 extern int MatSetValues_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
36 extern int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,int,const int[],int,const int[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 int MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
51   int          ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 int MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ *aij = (Mat_MPISBAIJ *)mat->data;
66   int          ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
103       if (nrow >= rmax) { \
104         /* there is no extra room in row, therefore enlarge */ \
105         int       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
106         MatScalar *new_a; \
107  \
108         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
109  \
110         /* malloc new storage space */ \
111         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(int); \
112         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
113         new_j = (int*)(new_a + bs2*new_nz); \
114         new_i = new_j + new_nz; \
115  \
116         /* copy over old data into new slots */ \
117         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
118         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
119         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
120         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
121         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
122         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
123         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
124         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
125                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
126         /* free up old matrix storage */ \
127         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
128         if (!a->singlemalloc) { \
129           ierr = PetscFree(a->i);CHKERRQ(ierr); \
130           ierr = PetscFree(a->j);CHKERRQ(ierr);\
131         } \
132         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
133         a->singlemalloc = PETSC_TRUE; \
134  \
135         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
136         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
137         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
138         a->maxnz += bs2*CHUNKSIZE; \
139         a->reallocs++; \
140         a->nz++; \
141       } \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       a_noinsert:; \
152     ailen[brow] = nrow; \
153 }
154 #ifndef MatSetValues_SeqBAIJ_B_Private
155 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
156 { \
157     brow = row/bs;  \
158     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
159     rmax = bimax[brow]; nrow = bilen[brow]; \
160       bcol = col/bs; \
161       ridx = row % bs; cidx = col % bs; \
162       low = 0; high = nrow; \
163       while (high-low > 3) { \
164         t = (low+high)/2; \
165         if (rp[t] > bcol) high = t; \
166         else              low  = t; \
167       } \
168       for (_i=low; _i<high; _i++) { \
169         if (rp[_i] > bcol) break; \
170         if (rp[_i] == bcol) { \
171           bap  = ap +  bs2*_i + bs*cidx + ridx; \
172           if (addv == ADD_VALUES) *bap += value;  \
173           else                    *bap  = value;  \
174           goto b_noinsert; \
175         } \
176       } \
177       if (b->nonew == 1) goto b_noinsert; \
178       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) into matrix", row, col); \
179       if (nrow >= rmax) { \
180         /* there is no extra room in row, therefore enlarge */ \
181         int       new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
182         MatScalar *new_a; \
183  \
184         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%d, %d) in the matrix", row, col); \
185  \
186         /* malloc new storage space */ \
187         len   = new_nz*(sizeof(int)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(int); \
188         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
189         new_j = (int*)(new_a + bs2*new_nz); \
190         new_i = new_j + new_nz; \
191  \
192         /* copy over old data into new slots */ \
193         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
194         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
195         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(int));CHKERRQ(ierr); \
196         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
197         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(int));CHKERRQ(ierr); \
198         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
200         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
201                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
202         /* free up old matrix storage */ \
203         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
204         if (!b->singlemalloc) { \
205           ierr = PetscFree(b->i);CHKERRQ(ierr); \
206           ierr = PetscFree(b->j);CHKERRQ(ierr); \
207         } \
208         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
209         b->singlemalloc = PETSC_TRUE; \
210  \
211         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
212         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
213         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(int) + bs2*sizeof(MatScalar))); \
214         b->maxnz += bs2*CHUNKSIZE; \
215         b->reallocs++; \
216         b->nz++; \
217       } \
218       N = nrow++ - 1;  \
219       /* shift up all the later entries in this row */ \
220       for (ii=N; ii>=_i; ii--) { \
221         rp[ii+1] = rp[ii]; \
222         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
223       } \
224       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
225       rp[_i]                      = bcol;  \
226       ap[bs2*_i + bs*cidx + ridx] = value;  \
227       b_noinsert:; \
228     bilen[brow] = nrow; \
229 }
230 #endif
231 
232 #if defined(PETSC_USE_MAT_SINGLE)
233 #undef __FUNCT__
234 #define __FUNCT__ "MatSetValues_MPISBAIJ"
235 int MatSetValues_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
236 {
237   Mat_MPISBAIJ *b = (Mat_MPISBAIJ*)mat->data;
238   int          ierr,i,N = m*n;
239   MatScalar    *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 int MatSetValuesBlocked_MPISBAIJ(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
261   int         ierr,i,N = m*n*b->bs2;
262   MatScalar   *vsingle;
263 
264   PetscFunctionBegin;
265   if (N > b->setvalueslen) {
266     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
267     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
268     b->setvalueslen  = N;
269   }
270   vsingle = b->setvaluescopy;
271   for (i=0; i<N; i++) {
272     vsingle[i] = v[i];
273   }
274   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
275   PetscFunctionReturn(0);
276 }
277 
278 #undef __FUNCT__
279 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT"
280 int MatSetValues_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
281 {
282   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
283   int         ierr,i,N = m*n;
284   MatScalar   *vsingle;
285 
286   PetscFunctionBegin;
287   SETERRQ(1,"Function not yet written for SBAIJ format");
288   /* PetscFunctionReturn(0); */
289 }
290 
291 #undef __FUNCT__
292 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT"
293 int MatSetValuesBlocked_MPISBAIJ_HT(Mat mat,int m,const int im[],int n,const int in[],const PetscScalar v[],InsertMode addv)
294 {
295   Mat_MPIBAIJ *b = (Mat_MPIBAIJ*)mat->data;
296   int         ierr,i,N = m*n*b->bs2;
297   MatScalar   *vsingle;
298 
299   PetscFunctionBegin;
300   SETERRQ(1,"Function not yet written for SBAIJ format");
301   /* PetscFunctionReturn(0); */
302 }
303 #endif
304 
305 /* Only add/insert a(i,j) with i<=j (blocks).
306    Any a(i,j) with i>j input by user is ingored.
307 */
308 #undef __FUNCT__
309 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
310 int MatSetValues_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
311 {
312   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
313   MatScalar    value;
314   PetscTruth   roworiented = baij->roworiented;
315   int          ierr,i,j,row,col;
316   int          rstart_orig=baij->rstart_bs;
317   int          rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
318   int          cend_orig=baij->cend_bs,bs=baij->bs;
319 
320   /* Some Variables required in the macro */
321   Mat          A = baij->A;
322   Mat_SeqSBAIJ *a = (Mat_SeqSBAIJ*)(A)->data;
323   int          *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
324   MatScalar    *aa=a->a;
325 
326   Mat          B = baij->B;
327   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(B)->data;
328   int          *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
329   MatScalar    *ba=b->a;
330 
331   int          *rp,ii,nrow,_i,rmax,N,brow,bcol;
332   int          low,high,t,ridx,cidx,bs2=a->bs2;
333   MatScalar    *ap,*bap;
334 
335   /* for stash */
336   int          n_loc, *in_loc=0;
337   MatScalar    *v_loc=0;
338 
339   PetscFunctionBegin;
340 
341   if(!baij->donotstash){
342     ierr = PetscMalloc(n*sizeof(int),&in_loc);CHKERRQ(ierr);
343     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
344   }
345 
346   for (i=0; i<m; i++) {
347     if (im[i] < 0) continue;
348 #if defined(PETSC_USE_BOPT_g)
349     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",im[i],mat->M-1);
350 #endif
351     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
352       row = im[i] - rstart_orig;              /* local row index */
353       for (j=0; j<n; j++) {
354         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
355         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
356           col = in[j] - cstart_orig;          /* local col index */
357           brow = row/bs; bcol = col/bs;
358           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
359           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
360           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
361           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
362         } else if (in[j] < 0) continue;
363 #if defined(PETSC_USE_BOPT_g)
364         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",in[j],mat->N-1);}
365 #endif
366         else {  /* off-diag entry (B) */
367           if (mat->was_assembled) {
368             if (!baij->colmap) {
369               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
370             }
371 #if defined (PETSC_USE_CTABLE)
372             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
373             col  = col - 1;
374 #else
375             col = baij->colmap[in[j]/bs] - 1;
376 #endif
377             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
378               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
379               col =  in[j];
380               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
381               B = baij->B;
382               b = (Mat_SeqBAIJ*)(B)->data;
383               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
384               ba=b->a;
385             } else col += in[j]%bs;
386           } else col = in[j];
387           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
388           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
389           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
390         }
391       }
392     } else {  /* off processor entry */
393       if (!baij->donotstash) {
394         n_loc = 0;
395         for (j=0; j<n; j++){
396           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
397           in_loc[n_loc] = in[j];
398           if (roworiented) {
399             v_loc[n_loc] = v[i*n+j];
400           } else {
401             v_loc[n_loc] = v[j*m+i];
402           }
403           n_loc++;
404         }
405         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
406       }
407     }
408   }
409 
410   if(!baij->donotstash){
411     ierr = PetscFree(in_loc);CHKERRQ(ierr);
412     ierr = PetscFree(v_loc);CHKERRQ(ierr);
413   }
414   PetscFunctionReturn(0);
415 }
416 
417 #undef __FUNCT__
418 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
419 int MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
420 {
421   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
422   const MatScalar *value;
423   MatScalar       *barray=baij->barray;
424   PetscTruth      roworiented = baij->roworiented;
425   int             ierr,i,j,ii,jj,row,col,rstart=baij->rstart;
426   int             rend=baij->rend,cstart=baij->cstart,stepval;
427   int             cend=baij->cend,bs=baij->bs,bs2=baij->bs2;
428 
429   PetscFunctionBegin;
430   if(!barray) {
431     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
432     baij->barray = barray;
433   }
434 
435   if (roworiented) {
436     stepval = (n-1)*bs;
437   } else {
438     stepval = (m-1)*bs;
439   }
440   for (i=0; i<m; i++) {
441     if (im[i] < 0) continue;
442 #if defined(PETSC_USE_BOPT_g)
443     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %d max %d",im[i],baij->Mbs-1);
444 #endif
445     if (im[i] >= rstart && im[i] < rend) {
446       row = im[i] - rstart;
447       for (j=0; j<n; j++) {
448         /* If NumCol = 1 then a copy is not required */
449         if ((roworiented) && (n == 1)) {
450           barray = (MatScalar*) v + i*bs2;
451         } else if((!roworiented) && (m == 1)) {
452           barray = (MatScalar*) v + j*bs2;
453         } else { /* Here a copy is required */
454           if (roworiented) {
455             value = v + i*(stepval+bs)*bs + j*bs;
456           } else {
457             value = v + j*(stepval+bs)*bs + i*bs;
458           }
459           for (ii=0; ii<bs; ii++,value+=stepval) {
460             for (jj=0; jj<bs; jj++) {
461               *barray++  = *value++;
462             }
463           }
464           barray -=bs2;
465         }
466 
467         if (in[j] >= cstart && in[j] < cend){
468           col  = in[j] - cstart;
469           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
470         }
471         else if (in[j] < 0) continue;
472 #if defined(PETSC_USE_BOPT_g)
473         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %d max %d",in[j],baij->Nbs-1);}
474 #endif
475         else {
476           if (mat->was_assembled) {
477             if (!baij->colmap) {
478               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
479             }
480 
481 #if defined(PETSC_USE_BOPT_g)
482 #if defined (PETSC_USE_CTABLE)
483             { int data;
484               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
485               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
486             }
487 #else
488             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
489 #endif
490 #endif
491 #if defined (PETSC_USE_CTABLE)
492 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
493             col  = (col - 1)/bs;
494 #else
495             col = (baij->colmap[in[j]] - 1)/bs;
496 #endif
497             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
498               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
499               col =  in[j];
500             }
501           }
502           else col = in[j];
503           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
504         }
505       }
506     } else {
507       if (!baij->donotstash) {
508         if (roworiented) {
509           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
510         } else {
511           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
512         }
513       }
514     }
515   }
516   PetscFunctionReturn(0);
517 }
518 
519 #define HASH_KEY 0.6180339887
520 #define HASH(size,key,tmp) (tmp = (key)*HASH_KEY,(int)((size)*(tmp-(int)tmp)))
521 /* #define HASH(size,key) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
522 /* #define HASH(size,key,tmp) ((int)((size)*fmod(((key)*HASH_KEY),1))) */
523 #undef __FUNCT__
524 #define __FUNCT__ "MatSetValues_MPISBAIJ_HT_MatScalar"
525 int MatSetValues_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
526 {
527   PetscFunctionBegin;
528   SETERRQ(1,"Function not yet written for SBAIJ format");
529   /* PetscFunctionReturn(0); */
530 }
531 
532 #undef __FUNCT__
533 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_HT_MatScalar"
534 int MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat mat,int m,const int im[],int n,const int in[],const MatScalar v[],InsertMode addv)
535 {
536   PetscFunctionBegin;
537   SETERRQ(1,"Function not yet written for SBAIJ format");
538   /* PetscFunctionReturn(0); */
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatGetValues_MPISBAIJ"
543 int MatGetValues_MPISBAIJ(Mat mat,int m,const int idxm[],int n,const int idxn[],PetscScalar v[])
544 {
545   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
546   int          bs=baij->bs,ierr,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
547   int          bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
548 
549   PetscFunctionBegin;
550   for (i=0; i<m; i++) {
551     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %d",idxm[i]);
552     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %d max %d",idxm[i],mat->M-1);
553     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
554       row = idxm[i] - bsrstart;
555       for (j=0; j<n; j++) {
556         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %d",idxn[j]);
557         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %d max %d",idxn[j],mat->N-1);
558         if (idxn[j] >= bscstart && idxn[j] < bscend){
559           col = idxn[j] - bscstart;
560           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
561         } else {
562           if (!baij->colmap) {
563             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
564           }
565 #if defined (PETSC_USE_CTABLE)
566           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
567           data --;
568 #else
569           data = baij->colmap[idxn[j]/bs]-1;
570 #endif
571           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
572           else {
573             col  = data + idxn[j]%bs;
574             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
575           }
576         }
577       }
578     } else {
579       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
580     }
581   }
582  PetscFunctionReturn(0);
583 }
584 
585 #undef __FUNCT__
586 #define __FUNCT__ "MatNorm_MPISBAIJ"
587 int MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
588 {
589   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
590   /* Mat_SeqSBAIJ *amat = (Mat_SeqSBAIJ*)baij->A->data; */
591   /* Mat_SeqBAIJ  *bmat = (Mat_SeqBAIJ*)baij->B->data; */
592   int        ierr;
593   PetscReal  sum[2],*lnorm2;
594 
595   PetscFunctionBegin;
596   if (baij->size == 1) {
597     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
598   } else {
599     if (type == NORM_FROBENIUS) {
600       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
601       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
602       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
603       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
604       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
605       /*
606       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
607       PetscSynchronizedPrintf(mat->comm,"[%d], lnorm2=%g, %g\n",rank,lnorm2[0],lnorm2[1]);
608       */
609       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
610       /*
611       PetscSynchronizedPrintf(mat->comm,"[%d], sum=%g, %g\n",rank,sum[0],sum[1]);
612       PetscSynchronizedFlush(mat->comm); */
613 
614       *norm = sqrt(sum[0] + 2*sum[1]);
615       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
616     } else {
617       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
618     }
619   }
620   PetscFunctionReturn(0);
621 }
622 
623 /*
624   Creates the hash table, and sets the table
625   This table is created only once.
626   If new entried need to be added to the matrix
627   then the hash table has to be destroyed and
628   recreated.
629 */
630 #undef __FUNCT__
631 #define __FUNCT__ "MatCreateHashTable_MPISBAIJ_Private"
632 int MatCreateHashTable_MPISBAIJ_Private(Mat mat,PetscReal factor)
633 {
634   PetscFunctionBegin;
635   SETERRQ(1,"Function not yet written for SBAIJ format");
636   /* PetscFunctionReturn(0); */
637 }
638 
639 #undef __FUNCT__
640 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
641 int MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
642 {
643   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
644   int         ierr,nstash,reallocs;
645   InsertMode  addv;
646 
647   PetscFunctionBegin;
648   if (baij->donotstash) {
649     PetscFunctionReturn(0);
650   }
651 
652   /* make sure all processors are either in INSERTMODE or ADDMODE */
653   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
654   if (addv == (ADD_VALUES|INSERT_VALUES)) {
655     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
656   }
657   mat->insertmode = addv; /* in case this processor had no cache */
658 
659   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
660   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
661   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
662   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %d entries,uses %d mallocs.\n",nstash,reallocs);
663   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
664   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %d entries, uses %d mallocs.\n",nstash,reallocs);
665   PetscFunctionReturn(0);
666 }
667 
668 #undef __FUNCT__
669 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
670 int MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
671 {
672   Mat_MPISBAIJ *baij=(Mat_MPISBAIJ*)mat->data;
673   Mat_SeqSBAIJ  *a=(Mat_SeqSBAIJ*)baij->A->data;
674   Mat_SeqBAIJ  *b=(Mat_SeqBAIJ*)baij->B->data;
675   int         i,j,rstart,ncols,n,ierr,flg,bs2=baij->bs2;
676   int         *row,*col,other_disassembled;
677   PetscTruth  r1,r2,r3;
678   MatScalar   *val;
679   InsertMode  addv = mat->insertmode;
680 
681   PetscFunctionBegin;
682 
683   if (!baij->donotstash) {
684     while (1) {
685       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
686       /*
687       PetscSynchronizedPrintf(mat->comm,"[%d]: in AssemblyEnd, stash, flg=%d\n",rank,flg);
688       PetscSynchronizedFlush(mat->comm);
689       */
690       if (!flg) break;
691 
692       for (i=0; i<n;) {
693         /* Now identify the consecutive vals belonging to the same row */
694         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
695         if (j < n) ncols = j-i;
696         else       ncols = n-i;
697         /* Now assemble all these values with a single function call */
698         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
699         i = j;
700       }
701     }
702     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
703     /* Now process the block-stash. Since the values are stashed column-oriented,
704        set the roworiented flag to column oriented, and after MatSetValues()
705        restore the original flags */
706     r1 = baij->roworiented;
707     r2 = a->roworiented;
708     r3 = b->roworiented;
709     baij->roworiented = PETSC_FALSE;
710     a->roworiented    = PETSC_FALSE;
711     b->roworiented    = PETSC_FALSE;
712     while (1) {
713       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
714       if (!flg) break;
715 
716       for (i=0; i<n;) {
717         /* Now identify the consecutive vals belonging to the same row */
718         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
719         if (j < n) ncols = j-i;
720         else       ncols = n-i;
721         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
722         i = j;
723       }
724     }
725     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
726     baij->roworiented = r1;
727     a->roworiented    = r2;
728     b->roworiented    = r3;
729   }
730 
731   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
732   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
733 
734   /* determine if any processor has disassembled, if so we must
735      also disassemble ourselfs, in order that we may reassemble. */
736   /*
737      if nonzero structure of submatrix B cannot change then we know that
738      no processor disassembled thus we can skip this stuff
739   */
740   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
741     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
742     if (mat->was_assembled && !other_disassembled) {
743       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
744     }
745   }
746 
747   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
748     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
749   }
750   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
751   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
752 
753 #if defined(PETSC_USE_BOPT_g)
754   if (baij->ht && mode== MAT_FINAL_ASSEMBLY) {
755     PetscLogInfo(0,"MatAssemblyEnd_MPISBAIJ:Average Hash Table Search in MatSetValues = %5.2f\n",((PetscReal)baij->ht_total_ct)/baij->ht_insert_ct);
756     baij->ht_total_ct  = 0;
757     baij->ht_insert_ct = 0;
758   }
759 #endif
760   if (baij->ht_flag && !baij->ht && mode == MAT_FINAL_ASSEMBLY) {
761     ierr = MatCreateHashTable_MPISBAIJ_Private(mat,baij->ht_fact);CHKERRQ(ierr);
762     mat->ops->setvalues        = MatSetValues_MPISBAIJ_HT;
763     mat->ops->setvaluesblocked = MatSetValuesBlocked_MPISBAIJ_HT;
764   }
765 
766   if (baij->rowvalues) {
767     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
768     baij->rowvalues = 0;
769   }
770 
771   PetscFunctionReturn(0);
772 }
773 
774 #undef __FUNCT__
775 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
776 static int MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
777 {
778   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
779   int               ierr,bs = baij->bs,size = baij->size,rank = baij->rank;
780   PetscTruth        isascii,isdraw;
781   PetscViewer       sviewer;
782   PetscViewerFormat format;
783 
784   PetscFunctionBegin;
785   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
786   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
787   if (isascii) {
788     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
789     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
790       MatInfo info;
791       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
792       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
793       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %d nz %d nz alloced %d bs %d mem %d\n",
794               rank,mat->m,(int)info.nz_used*bs,(int)info.nz_allocated*bs,
795               baij->bs,(int)info.memory);CHKERRQ(ierr);
796       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
797       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
798       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
799       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %d \n",rank,(int)info.nz_used*bs);CHKERRQ(ierr);
800       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
801       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
802       PetscFunctionReturn(0);
803     } else if (format == PETSC_VIEWER_ASCII_INFO) {
804       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %d\n",bs);CHKERRQ(ierr);
805       PetscFunctionReturn(0);
806     }
807   }
808 
809   if (isdraw) {
810     PetscDraw       draw;
811     PetscTruth isnull;
812     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
813     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
814   }
815 
816   if (size == 1) {
817     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
818     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
819   } else {
820     /* assemble the entire matrix onto first processor. */
821     Mat         A;
822     Mat_SeqSBAIJ *Aloc;
823     Mat_SeqBAIJ *Bloc;
824     int         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
825     MatScalar   *a;
826 
827     if (!rank) {
828       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,M,N,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
829     } else {
830       ierr = MatCreateMPISBAIJ(mat->comm,baij->bs,0,0,M,N,0,PETSC_NULL,0,PETSC_NULL,&A);CHKERRQ(ierr);
831     }
832     PetscLogObjectParent(mat,A);
833 
834     /* copy over the A part */
835     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
836     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
837     ierr  = PetscMalloc(bs*sizeof(int),&rvals);CHKERRQ(ierr);
838 
839     for (i=0; i<mbs; i++) {
840       rvals[0] = bs*(baij->rstart + i);
841       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
842       for (j=ai[i]; j<ai[i+1]; j++) {
843         col = (baij->cstart+aj[j])*bs;
844         for (k=0; k<bs; k++) {
845           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
846           col++; a += bs;
847         }
848       }
849     }
850     /* copy over the B part */
851     Bloc = (Mat_SeqBAIJ*)baij->B->data;
852     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
853     for (i=0; i<mbs; i++) {
854       rvals[0] = bs*(baij->rstart + i);
855       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
856       for (j=ai[i]; j<ai[i+1]; j++) {
857         col = baij->garray[aj[j]]*bs;
858         for (k=0; k<bs; k++) {
859           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
860           col++; a += bs;
861         }
862       }
863     }
864     ierr = PetscFree(rvals);CHKERRQ(ierr);
865     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
866     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
867     /*
868        Everyone has to call to draw the matrix since the graphics waits are
869        synchronized across all processors that share the PetscDraw object
870     */
871     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
872     if (!rank) {
873       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
874       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
875     }
876     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
877     ierr = MatDestroy(A);CHKERRQ(ierr);
878   }
879   PetscFunctionReturn(0);
880 }
881 
882 #undef __FUNCT__
883 #define __FUNCT__ "MatView_MPISBAIJ"
884 int MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
885 {
886   int        ierr;
887   PetscTruth isascii,isdraw,issocket,isbinary;
888 
889   PetscFunctionBegin;
890   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&isascii);CHKERRQ(ierr);
891   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
892   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
893   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
894   if (isascii || isdraw || issocket || isbinary) {
895     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
896   } else {
897     SETERRQ1(1,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
898   }
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatDestroy_MPISBAIJ"
904 int MatDestroy_MPISBAIJ(Mat mat)
905 {
906   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
907   int         ierr;
908 
909   PetscFunctionBegin;
910 #if defined(PETSC_USE_LOG)
911   PetscLogObjectState((PetscObject)mat,"Rows=%d,Cols=%d",mat->M,mat->N);
912 #endif
913   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
914   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
915   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
916   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
917   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
918 #if defined (PETSC_USE_CTABLE)
919   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
920 #else
921   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
922 #endif
923   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
924   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
925   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
926   if (baij->slvec0) {
927     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
928     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
929   }
930   if (baij->slvec1) {
931     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
932     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
933     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
934   }
935   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
936   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
937   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
938   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
939 #if defined(PETSC_USE_MAT_SINGLE)
940   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
941 #endif
942   ierr = PetscFree(baij);CHKERRQ(ierr);
943   PetscFunctionReturn(0);
944 }
945 
946 #undef __FUNCT__
947 #define __FUNCT__ "MatMult_MPISBAIJ"
948 int MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
949 {
950   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
951   int         ierr,nt,mbs=a->mbs,bs=a->bs;
952   PetscScalar *x,*from,zero=0.0;
953 
954   PetscFunctionBegin;
955   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
956   if (nt != A->n) {
957     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
958   }
959   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
960   if (nt != A->m) {
961     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
962   }
963 
964   /* diagonal part */
965   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
966   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
967 
968   /* subdiagonal part */
969   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
970 
971   /* copy x into the vec slvec0 */
972   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
973   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
974   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
975   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
976 
977   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
978   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
979   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
980 
981   /* supperdiagonal part */
982   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
983 
984   PetscFunctionReturn(0);
985 }
986 
987 #undef __FUNCT__
988 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
989 int MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
990 {
991   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
992   int         ierr,nt;
993 
994   PetscFunctionBegin;
995   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
996   if (nt != A->n) {
997     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
998   }
999   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
1000   if (nt != A->m) {
1001     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
1002   }
1003 
1004   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1005   /* do diagonal part */
1006   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
1007   /* do supperdiagonal part */
1008   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1009   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
1010   /* do subdiagonal part */
1011   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1012   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1013   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1014 
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
1020 int MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1023   int          ierr,mbs=a->mbs,bs=a->bs;
1024   PetscScalar  *x,*from,zero=0.0;
1025 
1026   PetscFunctionBegin;
1027   /*
1028   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1029   PetscSynchronizedFlush(A->comm);
1030   */
1031   /* diagonal part */
1032   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1033   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1034 
1035   /* subdiagonal part */
1036   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1037 
1038   /* copy x into the vec slvec0 */
1039   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1040   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1041   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1042   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1043 
1044   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1045   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1046   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1047 
1048   /* supperdiagonal part */
1049   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1050 
1051   PetscFunctionReturn(0);
1052 }
1053 
1054 #undef __FUNCT__
1055 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1056 int MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1057 {
1058   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1059   int        ierr;
1060 
1061   PetscFunctionBegin;
1062   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1063   /* do diagonal part */
1064   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1065   /* do supperdiagonal part */
1066   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1067   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1068 
1069   /* do subdiagonal part */
1070   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1071   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1072   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1073 
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1079 int MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1080 {
1081   int ierr;
1082 
1083   PetscFunctionBegin;
1084   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1085   PetscFunctionReturn(0);
1086 }
1087 
1088 #undef __FUNCT__
1089 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1090 int MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1091 {
1092   int ierr;
1093 
1094   PetscFunctionBegin;
1095   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1096   PetscFunctionReturn(0);
1097 }
1098 
1099 /*
1100   This only works correctly for square matrices where the subblock A->A is the
1101    diagonal block
1102 */
1103 #undef __FUNCT__
1104 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1105 int MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1106 {
1107   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1108   int         ierr;
1109 
1110   PetscFunctionBegin;
1111   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1112   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1113   PetscFunctionReturn(0);
1114 }
1115 
1116 #undef __FUNCT__
1117 #define __FUNCT__ "MatScale_MPISBAIJ"
1118 int MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1119 {
1120   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1121   int         ierr;
1122 
1123   PetscFunctionBegin;
1124   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1125   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1126   PetscFunctionReturn(0);
1127 }
1128 
1129 #undef __FUNCT__
1130 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1131 int MatGetRow_MPISBAIJ(Mat matin,int row,int *nz,int **idx,PetscScalar **v)
1132 {
1133   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1134   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1135   int            bs = mat->bs,bs2 = mat->bs2,i,ierr,*cworkA,*cworkB,**pcA,**pcB;
1136   int            nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1137   int            *cmap,*idx_p,cstart = mat->cstart;
1138 
1139   PetscFunctionBegin;
1140   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1141   mat->getrowactive = PETSC_TRUE;
1142 
1143   if (!mat->rowvalues && (idx || v)) {
1144     /*
1145         allocate enough space to hold information from the longest row.
1146     */
1147     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1148     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1149     int     max = 1,mbs = mat->mbs,tmp;
1150     for (i=0; i<mbs; i++) {
1151       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1152       if (max < tmp) { max = tmp; }
1153     }
1154     ierr = PetscMalloc(max*bs2*(sizeof(int)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1155     mat->rowindices = (int*)(mat->rowvalues + max*bs2);
1156   }
1157 
1158   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1159   lrow = row - brstart;  /* local row index */
1160 
1161   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1162   if (!v)   {pvA = 0; pvB = 0;}
1163   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1164   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1165   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1166   nztot = nzA + nzB;
1167 
1168   cmap  = mat->garray;
1169   if (v  || idx) {
1170     if (nztot) {
1171       /* Sort by increasing column numbers, assuming A and B already sorted */
1172       int imark = -1;
1173       if (v) {
1174         *v = v_p = mat->rowvalues;
1175         for (i=0; i<nzB; i++) {
1176           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1177           else break;
1178         }
1179         imark = i;
1180         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1181         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1182       }
1183       if (idx) {
1184         *idx = idx_p = mat->rowindices;
1185         if (imark > -1) {
1186           for (i=0; i<imark; i++) {
1187             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1188           }
1189         } else {
1190           for (i=0; i<nzB; i++) {
1191             if (cmap[cworkB[i]/bs] < cstart)
1192               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1193             else break;
1194           }
1195           imark = i;
1196         }
1197         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1198         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1199       }
1200     } else {
1201       if (idx) *idx = 0;
1202       if (v)   *v   = 0;
1203     }
1204   }
1205   *nz = nztot;
1206   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1207   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1208   PetscFunctionReturn(0);
1209 }
1210 
1211 #undef __FUNCT__
1212 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1213 int MatRestoreRow_MPISBAIJ(Mat mat,int row,int *nz,int **idx,PetscScalar **v)
1214 {
1215   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1216 
1217   PetscFunctionBegin;
1218   if (baij->getrowactive == PETSC_FALSE) {
1219     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow not called");
1220   }
1221   baij->getrowactive = PETSC_FALSE;
1222   PetscFunctionReturn(0);
1223 }
1224 
1225 #undef __FUNCT__
1226 #define __FUNCT__ "MatGetBlockSize_MPISBAIJ"
1227 int MatGetBlockSize_MPISBAIJ(Mat mat,int *bs)
1228 {
1229   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1230 
1231   PetscFunctionBegin;
1232   *bs = baij->bs;
1233   PetscFunctionReturn(0);
1234 }
1235 
1236 #undef __FUNCT__
1237 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1238 int MatZeroEntries_MPISBAIJ(Mat A)
1239 {
1240   Mat_MPISBAIJ *l = (Mat_MPISBAIJ*)A->data;
1241   int         ierr;
1242 
1243   PetscFunctionBegin;
1244   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1245   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1246   PetscFunctionReturn(0);
1247 }
1248 
1249 #undef __FUNCT__
1250 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1251 int MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1252 {
1253   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)matin->data;
1254   Mat         A = a->A,B = a->B;
1255   int         ierr;
1256   PetscReal   isend[5],irecv[5];
1257 
1258   PetscFunctionBegin;
1259   info->block_size     = (PetscReal)a->bs;
1260   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1261   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1262   isend[3] = info->memory;  isend[4] = info->mallocs;
1263   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1264   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1265   isend[3] += info->memory;  isend[4] += info->mallocs;
1266   if (flag == MAT_LOCAL) {
1267     info->nz_used      = isend[0];
1268     info->nz_allocated = isend[1];
1269     info->nz_unneeded  = isend[2];
1270     info->memory       = isend[3];
1271     info->mallocs      = isend[4];
1272   } else if (flag == MAT_GLOBAL_MAX) {
1273     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1274     info->nz_used      = irecv[0];
1275     info->nz_allocated = irecv[1];
1276     info->nz_unneeded  = irecv[2];
1277     info->memory       = irecv[3];
1278     info->mallocs      = irecv[4];
1279   } else if (flag == MAT_GLOBAL_SUM) {
1280     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1281     info->nz_used      = irecv[0];
1282     info->nz_allocated = irecv[1];
1283     info->nz_unneeded  = irecv[2];
1284     info->memory       = irecv[3];
1285     info->mallocs      = irecv[4];
1286   } else {
1287     SETERRQ1(1,"Unknown MatInfoType argument %d",flag);
1288   }
1289   info->rows_global       = (PetscReal)A->M;
1290   info->columns_global    = (PetscReal)A->N;
1291   info->rows_local        = (PetscReal)A->m;
1292   info->columns_local     = (PetscReal)A->N;
1293   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1294   info->fill_ratio_needed = 0;
1295   info->factor_mallocs    = 0;
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1301 int MatSetOption_MPISBAIJ(Mat A,MatOption op)
1302 {
1303   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
1304   int         ierr;
1305 
1306   PetscFunctionBegin;
1307   switch (op) {
1308   case MAT_NO_NEW_NONZERO_LOCATIONS:
1309   case MAT_YES_NEW_NONZERO_LOCATIONS:
1310   case MAT_COLUMNS_UNSORTED:
1311   case MAT_COLUMNS_SORTED:
1312   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1313   case MAT_KEEP_ZEROED_ROWS:
1314   case MAT_NEW_NONZERO_LOCATION_ERR:
1315     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1316     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1317     break;
1318   case MAT_ROW_ORIENTED:
1319     a->roworiented = PETSC_TRUE;
1320     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1321     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1322     break;
1323   case MAT_ROWS_SORTED:
1324   case MAT_ROWS_UNSORTED:
1325   case MAT_YES_NEW_DIAGONALS:
1326     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1327     break;
1328   case MAT_COLUMN_ORIENTED:
1329     a->roworiented = PETSC_FALSE;
1330     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1331     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1332     break;
1333   case MAT_IGNORE_OFF_PROC_ENTRIES:
1334     a->donotstash = PETSC_TRUE;
1335     break;
1336   case MAT_NO_NEW_DIAGONALS:
1337     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1338   case MAT_USE_HASH_TABLE:
1339     a->ht_flag = PETSC_TRUE;
1340     break;
1341   case MAT_NOT_SYMMETRIC:
1342   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1343   case MAT_HERMITIAN:
1344     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1345   case MAT_SYMMETRIC:
1346   case MAT_STRUCTURALLY_SYMMETRIC:
1347   case MAT_NOT_HERMITIAN:
1348   case MAT_SYMMETRY_ETERNAL:
1349   case MAT_NOT_SYMMETRY_ETERNAL:
1350     break;
1351   default:
1352     SETERRQ(PETSC_ERR_SUP,"unknown option");
1353   }
1354   PetscFunctionReturn(0);
1355 }
1356 
1357 #undef __FUNCT__
1358 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1359 int MatTranspose_MPISBAIJ(Mat A,Mat *B)
1360 {
1361   int ierr;
1362   PetscFunctionBegin;
1363   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1364   PetscFunctionReturn(0);
1365 }
1366 
1367 #undef __FUNCT__
1368 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1369 int MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1370 {
1371   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1372   Mat         a = baij->A,b = baij->B;
1373   int         ierr,s1,s2,s3;
1374 
1375   PetscFunctionBegin;
1376   if (ll != rr) {
1377     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1378   }
1379   ierr = MatGetLocalSize(mat,&s2,&s3);CHKERRQ(ierr);
1380   if (rr) {
1381     ierr = VecGetLocalSize(rr,&s1);CHKERRQ(ierr);
1382     if (s1!=s3) SETERRQ(PETSC_ERR_ARG_SIZ,"right vector non-conforming local size");
1383     /* Overlap communication with computation. */
1384     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1385     /*} if (ll) { */
1386     ierr = VecGetLocalSize(ll,&s1);CHKERRQ(ierr);
1387     if (s1!=s2) SETERRQ(PETSC_ERR_ARG_SIZ,"left vector non-conforming local size");
1388     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1389     /* } */
1390   /* scale  the diagonal block */
1391   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1392 
1393   /* if (rr) { */
1394     /* Do a scatter end and then right scale the off-diagonal block */
1395     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1396     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1397   }
1398 
1399   PetscFunctionReturn(0);
1400 }
1401 
1402 #undef __FUNCT__
1403 #define __FUNCT__ "MatZeroRows_MPISBAIJ"
1404 int MatZeroRows_MPISBAIJ(Mat A,IS is,const PetscScalar *diag)
1405 {
1406   PetscFunctionBegin;
1407   SETERRQ(PETSC_ERR_SUP,"No support for this function yet");
1408 }
1409 
1410 #undef __FUNCT__
1411 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1412 int MatPrintHelp_MPISBAIJ(Mat A)
1413 {
1414   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1415   MPI_Comm    comm = A->comm;
1416   static int  called = 0;
1417   int         ierr;
1418 
1419   PetscFunctionBegin;
1420   if (!a->rank) {
1421     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1422   }
1423   if (called) {PetscFunctionReturn(0);} else called = 1;
1424   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1425   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1426   PetscFunctionReturn(0);
1427 }
1428 
1429 #undef __FUNCT__
1430 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1431 int MatSetUnfactored_MPISBAIJ(Mat A)
1432 {
1433   Mat_MPISBAIJ *a   = (Mat_MPISBAIJ*)A->data;
1434   int         ierr;
1435 
1436   PetscFunctionBegin;
1437   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1438   PetscFunctionReturn(0);
1439 }
1440 
1441 static int MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1442 
1443 #undef __FUNCT__
1444 #define __FUNCT__ "MatEqual_MPISBAIJ"
1445 int MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1446 {
1447   Mat_MPISBAIJ *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1448   Mat         a,b,c,d;
1449   PetscTruth  flg;
1450   int         ierr;
1451 
1452   PetscFunctionBegin;
1453   a = matA->A; b = matA->B;
1454   c = matB->A; d = matB->B;
1455 
1456   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1457   if (flg == PETSC_TRUE) {
1458     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1459   }
1460   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1461   PetscFunctionReturn(0);
1462 }
1463 
1464 #undef __FUNCT__
1465 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1466 int MatSetUpPreallocation_MPISBAIJ(Mat A)
1467 {
1468   int        ierr;
1469 
1470   PetscFunctionBegin;
1471   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 #undef __FUNCT__
1476 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1477 int MatGetSubMatrices_MPISBAIJ(Mat A,int n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1478 {
1479   int        i,ierr;
1480   PetscTruth flg;
1481 
1482   for (i=0; i<n; i++) {
1483     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1484     if (!flg) {
1485       SETERRQ(1,"Can only get symmetric submatrix for MPISBAIJ matrices");
1486     }
1487   }
1488   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1489   PetscFunctionReturn(0);
1490 }
1491 
1492 
1493 /* -------------------------------------------------------------------*/
1494 static struct _MatOps MatOps_Values = {
1495        MatSetValues_MPISBAIJ,
1496        MatGetRow_MPISBAIJ,
1497        MatRestoreRow_MPISBAIJ,
1498        MatMult_MPISBAIJ,
1499 /* 4*/ MatMultAdd_MPISBAIJ,
1500        MatMultTranspose_MPISBAIJ,
1501        MatMultTransposeAdd_MPISBAIJ,
1502        0,
1503        0,
1504        0,
1505 /*10*/ 0,
1506        0,
1507        0,
1508        MatRelax_MPISBAIJ,
1509        MatTranspose_MPISBAIJ,
1510 /*15*/ MatGetInfo_MPISBAIJ,
1511        MatEqual_MPISBAIJ,
1512        MatGetDiagonal_MPISBAIJ,
1513        MatDiagonalScale_MPISBAIJ,
1514        MatNorm_MPISBAIJ,
1515 /*20*/ MatAssemblyBegin_MPISBAIJ,
1516        MatAssemblyEnd_MPISBAIJ,
1517        0,
1518        MatSetOption_MPISBAIJ,
1519        MatZeroEntries_MPISBAIJ,
1520 /*25*/ MatZeroRows_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1526        0,
1527        0,
1528        0,
1529        0,
1530 /*35*/ MatDuplicate_MPISBAIJ,
1531        0,
1532        0,
1533        0,
1534        0,
1535 /*40*/ 0,
1536        MatGetSubMatrices_MPISBAIJ,
1537        MatIncreaseOverlap_MPISBAIJ,
1538        MatGetValues_MPISBAIJ,
1539        0,
1540 /*45*/ MatPrintHelp_MPISBAIJ,
1541        MatScale_MPISBAIJ,
1542        0,
1543        0,
1544        0,
1545 /*50*/ MatGetBlockSize_MPISBAIJ,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*55*/ 0,
1551        0,
1552        MatSetUnfactored_MPISBAIJ,
1553        0,
1554        MatSetValuesBlocked_MPISBAIJ,
1555 /*60*/ 0,
1556        0,
1557        0,
1558        MatGetPetscMaps_Petsc,
1559        0,
1560 /*65*/ 0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*70*/ MatGetRowMax_MPISBAIJ,
1566        0,
1567        0,
1568        0,
1569        0,
1570 /*75*/ 0,
1571        0,
1572        0,
1573        0,
1574        0,
1575 /*80*/ 0,
1576        0,
1577        0,
1578        0,
1579 /*85*/ MatLoad_MPISBAIJ
1580 };
1581 
1582 
1583 EXTERN_C_BEGIN
1584 #undef __FUNCT__
1585 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1586 int MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1587 {
1588   PetscFunctionBegin;
1589   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1590   *iscopy = PETSC_FALSE;
1591   PetscFunctionReturn(0);
1592 }
1593 EXTERN_C_END
1594 
1595 EXTERN_C_BEGIN
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1598 int MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,int bs,int d_nz,int *d_nnz,int o_nz,int *o_nnz)
1599 {
1600   Mat_MPISBAIJ *b;
1601   int          ierr,i,mbs,Mbs;
1602 
1603   PetscFunctionBegin;
1604   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1605 
1606   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1607   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1608   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1609   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %d",d_nz);
1610   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %d",o_nz);
1611   if (d_nnz) {
1612     for (i=0; i<B->m/bs; i++) {
1613       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %d value %d",i,d_nnz[i]);
1614     }
1615   }
1616   if (o_nnz) {
1617     for (i=0; i<B->m/bs; i++) {
1618       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %d value %d",i,o_nnz[i]);
1619     }
1620   }
1621   B->preallocated = PETSC_TRUE;
1622   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1623   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1624   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1625   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1626 
1627   b   = (Mat_MPISBAIJ*)B->data;
1628   mbs = B->m/bs;
1629   Mbs = B->M/bs;
1630   if (mbs*bs != B->m) {
1631     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %d must be divisible by blocksize %d",B->m,bs);
1632   }
1633 
1634   b->bs  = bs;
1635   b->bs2 = bs*bs;
1636   b->mbs = mbs;
1637   b->nbs = mbs;
1638   b->Mbs = Mbs;
1639   b->Nbs = Mbs;
1640 
1641   ierr = MPI_Allgather(&b->mbs,1,MPI_INT,b->rowners+1,1,MPI_INT,B->comm);CHKERRQ(ierr);
1642   b->rowners[0]    = 0;
1643   for (i=2; i<=b->size; i++) {
1644     b->rowners[i] += b->rowners[i-1];
1645   }
1646   b->rstart    = b->rowners[b->rank];
1647   b->rend      = b->rowners[b->rank+1];
1648   b->cstart    = b->rstart;
1649   b->cend      = b->rend;
1650   for (i=0; i<=b->size; i++) {
1651     b->rowners_bs[i] = b->rowners[i]*bs;
1652   }
1653   b->rstart_bs = b-> rstart*bs;
1654   b->rend_bs   = b->rend*bs;
1655 
1656   b->cstart_bs = b->cstart*bs;
1657   b->cend_bs   = b->cend*bs;
1658 
1659   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1660   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1661   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1662   PetscLogObjectParent(B,b->A);
1663 
1664   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1665   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1666   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1667   PetscLogObjectParent(B,b->B);
1668 
1669   /* build cache for off array entries formed */
1670   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1671 
1672   PetscFunctionReturn(0);
1673 }
1674 EXTERN_C_END
1675 
1676 /*MC
1677    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1678    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1679 
1680    Options Database Keys:
1681 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1682 
1683   Level: beginner
1684 
1685 .seealso: MatCreateMPISBAIJ
1686 M*/
1687 
1688 EXTERN_C_BEGIN
1689 #undef __FUNCT__
1690 #define __FUNCT__ "MatCreate_MPISBAIJ"
1691 int MatCreate_MPISBAIJ(Mat B)
1692 {
1693   Mat_MPISBAIJ *b;
1694   int          ierr;
1695   PetscTruth   flg;
1696 
1697   PetscFunctionBegin;
1698 
1699   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1700   B->data = (void*)b;
1701   ierr    = PetscMemzero(b,sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1702   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1703 
1704   B->ops->destroy    = MatDestroy_MPISBAIJ;
1705   B->ops->view       = MatView_MPISBAIJ;
1706   B->mapping    = 0;
1707   B->factor     = 0;
1708   B->assembled  = PETSC_FALSE;
1709 
1710   B->insertmode = NOT_SET_VALUES;
1711   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1712   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1713 
1714   /* build local table of row and column ownerships */
1715   ierr          = PetscMalloc(3*(b->size+2)*sizeof(int),&b->rowners);CHKERRQ(ierr);
1716   b->cowners    = b->rowners + b->size + 2;
1717   b->rowners_bs = b->cowners + b->size + 2;
1718   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(int)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1719 
1720   /* build cache for off array entries formed */
1721   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1722   b->donotstash  = PETSC_FALSE;
1723   b->colmap      = PETSC_NULL;
1724   b->garray      = PETSC_NULL;
1725   b->roworiented = PETSC_TRUE;
1726 
1727 #if defined(PETSC_USE_MAT_SINGLE)
1728   /* stuff for MatSetValues_XXX in single precision */
1729   b->setvalueslen     = 0;
1730   b->setvaluescopy    = PETSC_NULL;
1731 #endif
1732 
1733   /* stuff used in block assembly */
1734   b->barray       = 0;
1735 
1736   /* stuff used for matrix vector multiply */
1737   b->lvec         = 0;
1738   b->Mvctx        = 0;
1739   b->slvec0       = 0;
1740   b->slvec0b      = 0;
1741   b->slvec1       = 0;
1742   b->slvec1a      = 0;
1743   b->slvec1b      = 0;
1744   b->sMvctx       = 0;
1745 
1746   /* stuff for MatGetRow() */
1747   b->rowindices   = 0;
1748   b->rowvalues    = 0;
1749   b->getrowactive = PETSC_FALSE;
1750 
1751   /* hash table stuff */
1752   b->ht           = 0;
1753   b->hd           = 0;
1754   b->ht_size      = 0;
1755   b->ht_flag      = PETSC_FALSE;
1756   b->ht_fact      = 0;
1757   b->ht_total_ct  = 0;
1758   b->ht_insert_ct = 0;
1759 
1760   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1761   if (flg) {
1762     PetscReal fact = 1.39;
1763     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1764     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1765     if (fact <= 1.0) fact = 1.39;
1766     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1767     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1768   }
1769   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1770                                      "MatStoreValues_MPISBAIJ",
1771                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1772   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1773                                      "MatRetrieveValues_MPISBAIJ",
1774                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1776                                      "MatGetDiagonalBlock_MPISBAIJ",
1777                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1779                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1780                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1781   PetscFunctionReturn(0);
1782 }
1783 EXTERN_C_END
1784 
1785 /*MC
1786    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1787 
1788    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1789    and MATMPISBAIJ otherwise.
1790 
1791    Options Database Keys:
1792 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1793 
1794   Level: beginner
1795 
1796 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1797 M*/
1798 
1799 EXTERN_C_BEGIN
1800 #undef __FUNCT__
1801 #define __FUNCT__ "MatCreate_SBAIJ"
1802 int MatCreate_SBAIJ(Mat A) {
1803   int ierr,size;
1804 
1805   PetscFunctionBegin;
1806   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1807   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1808   if (size == 1) {
1809     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1810   } else {
1811     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1812   }
1813   PetscFunctionReturn(0);
1814 }
1815 EXTERN_C_END
1816 
1817 #undef __FUNCT__
1818 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1819 /*@C
1820    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1821    the user should preallocate the matrix storage by setting the parameters
1822    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1823    performance can be increased by more than a factor of 50.
1824 
1825    Collective on Mat
1826 
1827    Input Parameters:
1828 +  A - the matrix
1829 .  bs   - size of blockk
1830 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1831            submatrix  (same for all local rows)
1832 .  d_nnz - array containing the number of block nonzeros in the various block rows
1833            in the upper triangular and diagonal part of the in diagonal portion of the local
1834            (possibly different for each block row) or PETSC_NULL.  You must leave room
1835            for the diagonal entry even if it is zero.
1836 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1837            submatrix (same for all local rows).
1838 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1839            off-diagonal portion of the local submatrix (possibly different for
1840            each block row) or PETSC_NULL.
1841 
1842 
1843    Options Database Keys:
1844 .   -mat_no_unroll - uses code that does not unroll the loops in the
1845                      block calculations (much slower)
1846 .   -mat_block_size - size of the blocks to use
1847 
1848    Notes:
1849 
1850    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1851    than it must be used on all processors that share the object for that argument.
1852 
1853    Storage Information:
1854    For a square global matrix we define each processor's diagonal portion
1855    to be its local rows and the corresponding columns (a square submatrix);
1856    each processor's off-diagonal portion encompasses the remainder of the
1857    local matrix (a rectangular submatrix).
1858 
1859    The user can specify preallocated storage for the diagonal part of
1860    the local submatrix with either d_nz or d_nnz (not both).  Set
1861    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1862    memory allocation.  Likewise, specify preallocated storage for the
1863    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1864 
1865    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1866    the figure below we depict these three local rows and all columns (0-11).
1867 
1868 .vb
1869            0 1 2 3 4 5 6 7 8 9 10 11
1870           -------------------
1871    row 3  |  o o o d d d o o o o o o
1872    row 4  |  o o o d d d o o o o o o
1873    row 5  |  o o o d d d o o o o o o
1874           -------------------
1875 .ve
1876 
1877    Thus, any entries in the d locations are stored in the d (diagonal)
1878    submatrix, and any entries in the o locations are stored in the
1879    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1880    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1881 
1882    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1883    plus the diagonal part of the d matrix,
1884    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1885    In general, for PDE problems in which most nonzeros are near the diagonal,
1886    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1887    or you will get TERRIBLE performance; see the users' manual chapter on
1888    matrices.
1889 
1890    Level: intermediate
1891 
1892 .keywords: matrix, block, aij, compressed row, sparse, parallel
1893 
1894 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1895 @*/
1896 int MatMPISBAIJSetPreallocation(Mat B,int bs,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[])
1897 {
1898   int ierr,(*f)(Mat,int,int,const int[],int,const int[]);
1899 
1900   PetscFunctionBegin;
1901   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1902   if (f) {
1903     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1904   }
1905   PetscFunctionReturn(0);
1906 }
1907 
1908 #undef __FUNCT__
1909 #define __FUNCT__ "MatCreateMPISBAIJ"
1910 /*@C
1911    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1912    (block compressed row).  For good matrix assembly performance
1913    the user should preallocate the matrix storage by setting the parameters
1914    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1915    performance can be increased by more than a factor of 50.
1916 
1917    Collective on MPI_Comm
1918 
1919    Input Parameters:
1920 +  comm - MPI communicator
1921 .  bs   - size of blockk
1922 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1923            This value should be the same as the local size used in creating the
1924            y vector for the matrix-vector product y = Ax.
1925 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1926            This value should be the same as the local size used in creating the
1927            x vector for the matrix-vector product y = Ax.
1928 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1929 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1930 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1931            submatrix  (same for all local rows)
1932 .  d_nnz - array containing the number of block nonzeros in the various block rows
1933            in the upper triangular portion of the in diagonal portion of the local
1934            (possibly different for each block block row) or PETSC_NULL.
1935            You must leave room for the diagonal entry even if it is zero.
1936 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1937            submatrix (same for all local rows).
1938 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1939            off-diagonal portion of the local submatrix (possibly different for
1940            each block row) or PETSC_NULL.
1941 
1942    Output Parameter:
1943 .  A - the matrix
1944 
1945    Options Database Keys:
1946 .   -mat_no_unroll - uses code that does not unroll the loops in the
1947                      block calculations (much slower)
1948 .   -mat_block_size - size of the blocks to use
1949 .   -mat_mpi - use the parallel matrix data structures even on one processor
1950                (defaults to using SeqBAIJ format on one processor)
1951 
1952    Notes:
1953    The user MUST specify either the local or global matrix dimensions
1954    (possibly both).
1955 
1956    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1957    than it must be used on all processors that share the object for that argument.
1958 
1959    Storage Information:
1960    For a square global matrix we define each processor's diagonal portion
1961    to be its local rows and the corresponding columns (a square submatrix);
1962    each processor's off-diagonal portion encompasses the remainder of the
1963    local matrix (a rectangular submatrix).
1964 
1965    The user can specify preallocated storage for the diagonal part of
1966    the local submatrix with either d_nz or d_nnz (not both).  Set
1967    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1968    memory allocation.  Likewise, specify preallocated storage for the
1969    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1970 
1971    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1972    the figure below we depict these three local rows and all columns (0-11).
1973 
1974 .vb
1975            0 1 2 3 4 5 6 7 8 9 10 11
1976           -------------------
1977    row 3  |  o o o d d d o o o o o o
1978    row 4  |  o o o d d d o o o o o o
1979    row 5  |  o o o d d d o o o o o o
1980           -------------------
1981 .ve
1982 
1983    Thus, any entries in the d locations are stored in the d (diagonal)
1984    submatrix, and any entries in the o locations are stored in the
1985    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1986    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1987 
1988    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1989    plus the diagonal part of the d matrix,
1990    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1991    In general, for PDE problems in which most nonzeros are near the diagonal,
1992    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1993    or you will get TERRIBLE performance; see the users' manual chapter on
1994    matrices.
1995 
1996    Level: intermediate
1997 
1998 .keywords: matrix, block, aij, compressed row, sparse, parallel
1999 
2000 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2001 @*/
2002 
2003 int MatCreateMPISBAIJ(MPI_Comm comm,int bs,int m,int n,int M,int N,int d_nz,const int d_nnz[],int o_nz,const int o_nnz[],Mat *A)
2004 {
2005   int ierr,size;
2006 
2007   PetscFunctionBegin;
2008   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
2009   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2010   if (size > 1) {
2011     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2012     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2013   } else {
2014     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2015     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2016   }
2017   PetscFunctionReturn(0);
2018 }
2019 
2020 
2021 #undef __FUNCT__
2022 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2023 static int MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2024 {
2025   Mat          mat;
2026   Mat_MPISBAIJ *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2027   int          ierr,len=0,nt,bs=oldmat->bs,mbs=oldmat->mbs;
2028   PetscScalar  *array;
2029 
2030   PetscFunctionBegin;
2031   *newmat       = 0;
2032   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
2033   ierr = MatSetType(mat,MATMPISBAIJ);CHKERRQ(ierr);
2034 
2035   /* ierr = PetscMemcpy(mat->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr); */ /*-- cause error? */
2036   mat->factor       = matin->factor;
2037   mat->preallocated = PETSC_TRUE;
2038   mat->assembled    = PETSC_TRUE;
2039   mat->insertmode   = NOT_SET_VALUES;
2040 
2041   a = (Mat_MPISBAIJ*)mat->data;
2042   a->bs  = oldmat->bs;
2043   a->bs2 = oldmat->bs2;
2044   a->mbs = oldmat->mbs;
2045   a->nbs = oldmat->nbs;
2046   a->Mbs = oldmat->Mbs;
2047   a->Nbs = oldmat->Nbs;
2048 
2049   a->rstart       = oldmat->rstart;
2050   a->rend         = oldmat->rend;
2051   a->cstart       = oldmat->cstart;
2052   a->cend         = oldmat->cend;
2053   a->size         = oldmat->size;
2054   a->rank         = oldmat->rank;
2055   a->donotstash   = oldmat->donotstash;
2056   a->roworiented  = oldmat->roworiented;
2057   a->rowindices   = 0;
2058   a->rowvalues    = 0;
2059   a->getrowactive = PETSC_FALSE;
2060   a->barray       = 0;
2061   a->rstart_bs    = oldmat->rstart_bs;
2062   a->rend_bs      = oldmat->rend_bs;
2063   a->cstart_bs    = oldmat->cstart_bs;
2064   a->cend_bs      = oldmat->cend_bs;
2065 
2066   /* hash table stuff */
2067   a->ht           = 0;
2068   a->hd           = 0;
2069   a->ht_size      = 0;
2070   a->ht_flag      = oldmat->ht_flag;
2071   a->ht_fact      = oldmat->ht_fact;
2072   a->ht_total_ct  = 0;
2073   a->ht_insert_ct = 0;
2074 
2075   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(int));CHKERRQ(ierr);
2076   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2077   ierr = MatStashCreate_Private(matin->comm,oldmat->bs,&mat->bstash);CHKERRQ(ierr);
2078   if (oldmat->colmap) {
2079 #if defined (PETSC_USE_CTABLE)
2080     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2081 #else
2082     ierr = PetscMalloc((a->Nbs)*sizeof(int),&a->colmap);CHKERRQ(ierr);
2083     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(int));
2084     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(int));CHKERRQ(ierr);
2085 #endif
2086   } else a->colmap = 0;
2087 
2088   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2089     ierr = PetscMalloc(len*sizeof(int),&a->garray);CHKERRQ(ierr);
2090     PetscLogObjectMemory(mat,len*sizeof(int));
2091     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(int));CHKERRQ(ierr);
2092   } else a->garray = 0;
2093 
2094   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2095   PetscLogObjectParent(mat,a->lvec);
2096   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2097   PetscLogObjectParent(mat,a->Mvctx);
2098 
2099   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2100   PetscLogObjectParent(mat,a->slvec0);
2101   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2102   PetscLogObjectParent(mat,a->slvec1);
2103 
2104   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2105   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2106   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2107   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2108   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2109   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2110   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2111   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2112   PetscLogObjectParent(mat,a->slvec0);
2113   PetscLogObjectParent(mat,a->slvec1);
2114   PetscLogObjectParent(mat,a->slvec0b);
2115   PetscLogObjectParent(mat,a->slvec1a);
2116   PetscLogObjectParent(mat,a->slvec1b);
2117 
2118   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2119   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2120   a->sMvctx = oldmat->sMvctx;
2121   PetscLogObjectParent(mat,a->sMvctx);
2122 
2123   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2124   PetscLogObjectParent(mat,a->A);
2125   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2126   PetscLogObjectParent(mat,a->B);
2127   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2128   *newmat = mat;
2129   PetscFunctionReturn(0);
2130 }
2131 
2132 #include "petscsys.h"
2133 
2134 #undef __FUNCT__
2135 #define __FUNCT__ "MatLoad_MPISBAIJ"
2136 int MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2137 {
2138   Mat          A;
2139   int          i,nz,ierr,j,rstart,rend,fd;
2140   PetscScalar  *vals,*buf;
2141   MPI_Comm     comm = ((PetscObject)viewer)->comm;
2142   MPI_Status   status;
2143   int          header[4],rank,size,*rowlengths = 0,M,N,m,*rowners,*browners,maxnz,*cols;
2144   int          *locrowlens,*sndcounts = 0,*procsnz = 0,jj,*mycols,*ibuf;
2145   int          tag = ((PetscObject)viewer)->tag,bs=1,Mbs,mbs,extra_rows;
2146   int          *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2147   int          dcount,kmax,k,nzcount,tmp;
2148 
2149   PetscFunctionBegin;
2150   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2151 
2152   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2153   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2154   if (!rank) {
2155     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2156     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2157     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2158     if (header[3] < 0) {
2159       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2160     }
2161   }
2162 
2163   ierr = MPI_Bcast(header+1,3,MPI_INT,0,comm);CHKERRQ(ierr);
2164   M = header[1]; N = header[2];
2165 
2166   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2167 
2168   /*
2169      This code adds extra rows to make sure the number of rows is
2170      divisible by the blocksize
2171   */
2172   Mbs        = M/bs;
2173   extra_rows = bs - M + bs*(Mbs);
2174   if (extra_rows == bs) extra_rows = 0;
2175   else                  Mbs++;
2176   if (extra_rows &&!rank) {
2177     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2178   }
2179 
2180   /* determine ownership of all rows */
2181   mbs        = Mbs/size + ((Mbs % size) > rank);
2182   m          = mbs*bs;
2183   ierr       = PetscMalloc(2*(size+2)*sizeof(int),&rowners);CHKERRQ(ierr);
2184   browners   = rowners + size + 1;
2185   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2186   rowners[0] = 0;
2187   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2188   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2189   rstart = rowners[rank];
2190   rend   = rowners[rank+1];
2191 
2192   /* distribute row lengths to all processors */
2193   ierr = PetscMalloc((rend-rstart)*bs*sizeof(int),&locrowlens);CHKERRQ(ierr);
2194   if (!rank) {
2195     ierr = PetscMalloc((M+extra_rows)*sizeof(int),&rowlengths);CHKERRQ(ierr);
2196     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2197     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2198     ierr = PetscMalloc(size*sizeof(int),&sndcounts);CHKERRQ(ierr);
2199     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2200     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2201     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2202   } else {
2203     ierr = MPI_Scatterv(0,0,0,MPI_INT,locrowlens,(rend-rstart)*bs,MPI_INT,0,comm);CHKERRQ(ierr);
2204   }
2205 
2206   if (!rank) {   /* procs[0] */
2207     /* calculate the number of nonzeros on each processor */
2208     ierr = PetscMalloc(size*sizeof(int),&procsnz);CHKERRQ(ierr);
2209     ierr = PetscMemzero(procsnz,size*sizeof(int));CHKERRQ(ierr);
2210     for (i=0; i<size; i++) {
2211       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2212         procsnz[i] += rowlengths[j];
2213       }
2214     }
2215     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2216 
2217     /* determine max buffer needed and allocate it */
2218     maxnz = 0;
2219     for (i=0; i<size; i++) {
2220       maxnz = PetscMax(maxnz,procsnz[i]);
2221     }
2222     ierr = PetscMalloc(maxnz*sizeof(int),&cols);CHKERRQ(ierr);
2223 
2224     /* read in my part of the matrix column indices  */
2225     nz     = procsnz[0];
2226     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2227     mycols = ibuf;
2228     if (size == 1)  nz -= extra_rows;
2229     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2230     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2231 
2232     /* read in every ones (except the last) and ship off */
2233     for (i=1; i<size-1; i++) {
2234       nz   = procsnz[i];
2235       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2236       ierr = MPI_Send(cols,nz,MPI_INT,i,tag,comm);CHKERRQ(ierr);
2237     }
2238     /* read in the stuff for the last proc */
2239     if (size != 1) {
2240       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2241       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2242       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2243       ierr = MPI_Send(cols,nz+extra_rows,MPI_INT,size-1,tag,comm);CHKERRQ(ierr);
2244     }
2245     ierr = PetscFree(cols);CHKERRQ(ierr);
2246   } else {  /* procs[i], i>0 */
2247     /* determine buffer space needed for message */
2248     nz = 0;
2249     for (i=0; i<m; i++) {
2250       nz += locrowlens[i];
2251     }
2252     ierr   = PetscMalloc(nz*sizeof(int),&ibuf);CHKERRQ(ierr);
2253     mycols = ibuf;
2254     /* receive message of column indices*/
2255     ierr = MPI_Recv(mycols,nz,MPI_INT,0,tag,comm,&status);CHKERRQ(ierr);
2256     ierr = MPI_Get_count(&status,MPI_INT,&maxnz);CHKERRQ(ierr);
2257     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2258   }
2259 
2260   /* loop over local rows, determining number of off diagonal entries */
2261   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(int),&dlens);CHKERRQ(ierr);
2262   odlens   = dlens + (rend-rstart);
2263   ierr     = PetscMalloc(3*Mbs*sizeof(int),&mask);CHKERRQ(ierr);
2264   ierr     = PetscMemzero(mask,3*Mbs*sizeof(int));CHKERRQ(ierr);
2265   masked1  = mask    + Mbs;
2266   masked2  = masked1 + Mbs;
2267   rowcount = 0; nzcount = 0;
2268   for (i=0; i<mbs; i++) {
2269     dcount  = 0;
2270     odcount = 0;
2271     for (j=0; j<bs; j++) {
2272       kmax = locrowlens[rowcount];
2273       for (k=0; k<kmax; k++) {
2274         tmp = mycols[nzcount++]/bs; /* block col. index */
2275         if (!mask[tmp]) {
2276           mask[tmp] = 1;
2277           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2278           else masked1[dcount++] = tmp; /* entry in diag portion */
2279         }
2280       }
2281       rowcount++;
2282     }
2283 
2284     dlens[i]  = dcount;  /* d_nzz[i] */
2285     odlens[i] = odcount; /* o_nzz[i] */
2286 
2287     /* zero out the mask elements we set */
2288     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2289     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2290   }
2291 
2292   /* create our matrix */
2293   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2294   ierr = MatSetType(A,type);CHKERRQ(ierr);
2295   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2296   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2297 
2298   if (!rank) {
2299     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2300     /* read in my part of the matrix numerical values  */
2301     nz = procsnz[0];
2302     vals = buf;
2303     mycols = ibuf;
2304     if (size == 1)  nz -= extra_rows;
2305     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2306     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2307 
2308     /* insert into matrix */
2309     jj      = rstart*bs;
2310     for (i=0; i<m; i++) {
2311       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2312       mycols += locrowlens[i];
2313       vals   += locrowlens[i];
2314       jj++;
2315     }
2316 
2317     /* read in other processors (except the last one) and ship out */
2318     for (i=1; i<size-1; i++) {
2319       nz   = procsnz[i];
2320       vals = buf;
2321       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2322       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2323     }
2324     /* the last proc */
2325     if (size != 1){
2326       nz   = procsnz[i] - extra_rows;
2327       vals = buf;
2328       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2329       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2330       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2331     }
2332     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2333 
2334   } else {
2335     /* receive numeric values */
2336     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2337 
2338     /* receive message of values*/
2339     vals   = buf;
2340     mycols = ibuf;
2341     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2342     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2343     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2344 
2345     /* insert into matrix */
2346     jj      = rstart*bs;
2347     for (i=0; i<m; i++) {
2348       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2349       mycols += locrowlens[i];
2350       vals   += locrowlens[i];
2351       jj++;
2352     }
2353   }
2354 
2355   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2356   ierr = PetscFree(buf);CHKERRQ(ierr);
2357   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2358   ierr = PetscFree(rowners);CHKERRQ(ierr);
2359   ierr = PetscFree(dlens);CHKERRQ(ierr);
2360   ierr = PetscFree(mask);CHKERRQ(ierr);
2361   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2362   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2363   *newmat = A;
2364   PetscFunctionReturn(0);
2365 }
2366 
2367 #undef __FUNCT__
2368 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2369 /*@
2370    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2371 
2372    Input Parameters:
2373 .  mat  - the matrix
2374 .  fact - factor
2375 
2376    Collective on Mat
2377 
2378    Level: advanced
2379 
2380   Notes:
2381    This can also be set by the command line option: -mat_use_hash_table fact
2382 
2383 .keywords: matrix, hashtable, factor, HT
2384 
2385 .seealso: MatSetOption()
2386 @*/
2387 int MatMPISBAIJSetHashTableFactor(Mat mat,PetscReal fact)
2388 {
2389   PetscFunctionBegin;
2390   SETERRQ(1,"Function not yet written for SBAIJ format");
2391   /* PetscFunctionReturn(0); */
2392 }
2393 
2394 #undef __FUNCT__
2395 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2396 int MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2397 {
2398   Mat_MPISBAIJ *a = (Mat_MPISBAIJ*)A->data;
2399   Mat_SeqBAIJ  *b = (Mat_SeqBAIJ*)(a->B)->data;
2400   PetscReal    atmp;
2401   PetscReal    *work,*svalues,*rvalues;
2402   int          ierr,i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2403   int          rank,size,*rowners_bs,dest,count,source;
2404   PetscScalar  *va;
2405   MatScalar    *ba;
2406   MPI_Status   stat;
2407 
2408   PetscFunctionBegin;
2409   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2410   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2411 
2412   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2413   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2414 
2415   bs   = a->bs;
2416   mbs  = a->mbs;
2417   Mbs  = a->Mbs;
2418   ba   = b->a;
2419   bi   = b->i;
2420   bj   = b->j;
2421   /*
2422   PetscSynchronizedPrintf(A->comm,"[%d] M: %d, bs: %d, mbs: %d \n",rank,bs*Mbs,bs,mbs);
2423   PetscSynchronizedFlush(A->comm);
2424   */
2425 
2426   /* find ownerships */
2427   rowners_bs = a->rowners_bs;
2428   /*
2429   if (!rank){
2430     for (i=0; i<size+1; i++) PetscPrintf(PETSC_COMM_SELF," rowners_bs[%d]: %d\n",i,rowners_bs[i]);
2431   }
2432   */
2433 
2434   /* each proc creates an array to be distributed */
2435   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2436   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2437 
2438   /* row_max for B */
2439   if (rank != size-1){
2440     for (i=0; i<mbs; i++) {
2441       ncols = bi[1] - bi[0]; bi++;
2442       brow  = bs*i;
2443       for (j=0; j<ncols; j++){
2444         bcol = bs*(*bj);
2445         for (kcol=0; kcol<bs; kcol++){
2446           col = bcol + kcol;                 /* local col index */
2447           col += rowners_bs[rank+1];      /* global col index */
2448           /* PetscPrintf(PETSC_COMM_SELF,"[%d], col: %d\n",rank,col); */
2449           for (krow=0; krow<bs; krow++){
2450             atmp = PetscAbsScalar(*ba); ba++;
2451             row = brow + krow;    /* local row index */
2452             /* printf("val[%d,%d]: %g\n",row,col,atmp); */
2453             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2454             if (work[col] < atmp) work[col] = atmp;
2455           }
2456         }
2457         bj++;
2458       }
2459     }
2460     /*
2461       PetscPrintf(PETSC_COMM_SELF,"[%d], work: ",rank);
2462       for (i=0; i<bs*Mbs; i++) PetscPrintf(PETSC_COMM_SELF,"%g ",work[i]);
2463       PetscPrintf(PETSC_COMM_SELF,"[%d]: \n");
2464       */
2465 
2466     /* send values to its owners */
2467     for (dest=rank+1; dest<size; dest++){
2468       svalues = work + rowners_bs[dest];
2469       count   = rowners_bs[dest+1]-rowners_bs[dest];
2470       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2471       /*
2472       PetscSynchronizedPrintf(A->comm,"[%d] sends %d values to [%d]: %g, %g, %g, %g\n",rank,count,dest,svalues[0],svalues[1],svalues[2],svalues[3]);
2473       PetscSynchronizedFlush(A->comm);
2474       */
2475     }
2476   }
2477 
2478   /* receive values */
2479   if (rank){
2480     rvalues = work;
2481     count   = rowners_bs[rank+1]-rowners_bs[rank];
2482     for (source=0; source<rank; source++){
2483       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2484       /* process values */
2485       for (i=0; i<count; i++){
2486         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2487       }
2488       /*
2489       PetscSynchronizedPrintf(A->comm,"[%d] received %d values from [%d]: %g, %g, %g, %g \n",rank,count,stat.MPI_SOURCE,rvalues[0],rvalues[1],rvalues[2],rvalues[3]);
2490       PetscSynchronizedFlush(A->comm);
2491       */
2492     }
2493   }
2494 
2495   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2496   ierr = PetscFree(work);CHKERRQ(ierr);
2497   PetscFunctionReturn(0);
2498 }
2499 
2500 #undef __FUNCT__
2501 #define __FUNCT__ "MatRelax_MPISBAIJ"
2502 int MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2503 {
2504   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2505   int            ierr,mbs=mat->mbs,bs=mat->bs;
2506   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2507   Vec            bb1;
2508 
2509   PetscFunctionBegin;
2510   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2511   if (bs > 1)
2512     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2513 
2514   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2515     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2516       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2517       its--;
2518     }
2519 
2520     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2521     while (its--){
2522 
2523       /* lower triangular part: slvec0b = - B^T*xx */
2524       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2525 
2526       /* copy xx into slvec0a */
2527       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2528       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2529       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2530       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2531 
2532       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2533 
2534       /* copy bb into slvec1a */
2535       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2536       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2537       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2538       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2539 
2540       /* set slvec1b = 0 */
2541       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2542 
2543       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2544       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2545       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2546       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2547 
2548       /* upper triangular part: bb1 = bb1 - B*x */
2549       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2550 
2551       /* local diagonal sweep */
2552       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2553     }
2554     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2555   } else {
2556     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2557   }
2558   PetscFunctionReturn(0);
2559 }
2560 
2561 #undef __FUNCT__
2562 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2563 int MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,int its,int lits,Vec xx)
2564 {
2565   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2566   int            ierr;
2567   PetscScalar    mone=-1.0;
2568   Vec            lvec1,bb1;
2569 
2570   PetscFunctionBegin;
2571   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %d and local its %d both positive",its,lits);
2572   if (mat->bs > 1)
2573     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2574 
2575   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2576     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2577       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2578       its--;
2579     }
2580 
2581     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2582     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2583     while (its--){
2584       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2585 
2586       /* lower diagonal part: bb1 = bb - B^T*xx */
2587       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2588       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2589 
2590       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2591       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2592       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2593 
2594       /* upper diagonal part: bb1 = bb1 - B*x */
2595       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2596       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2597 
2598       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2599 
2600       /* diagonal sweep */
2601       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2602     }
2603     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2604     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2605   } else {
2606     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2607   }
2608   PetscFunctionReturn(0);
2609 }
2610 
2611