xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision b3bf805bc5fa6ada2d57fa222e3d33ae005e5770)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else {
563       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
564     }
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
571 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
572 {
573   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
574   PetscErrorCode ierr;
575   PetscInt       nstash,reallocs;
576   InsertMode     addv;
577 
578   PetscFunctionBegin;
579   if (baij->donotstash) {
580     PetscFunctionReturn(0);
581   }
582 
583   /* make sure all processors are either in INSERTMODE or ADDMODE */
584   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
585   if (addv == (ADD_VALUES|INSERT_VALUES)) {
586     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
587   }
588   mat->insertmode = addv; /* in case this processor had no cache */
589 
590   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
591   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
592   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
593   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
594   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
595   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
601 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
602 {
603   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
604   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
605   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
606   PetscErrorCode ierr;
607   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
608   PetscInt       *row,*col,other_disassembled;
609   PetscMPIInt    n;
610   PetscTruth     r1,r2,r3;
611   MatScalar      *val;
612   InsertMode     addv = mat->insertmode;
613 
614   PetscFunctionBegin;
615 
616   if (!baij->donotstash) {
617     while (1) {
618       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
619       if (!flg) break;
620 
621       for (i=0; i<n;) {
622         /* Now identify the consecutive vals belonging to the same row */
623         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
624         if (j < n) ncols = j-i;
625         else       ncols = n-i;
626         /* Now assemble all these values with a single function call */
627         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
628         i = j;
629       }
630     }
631     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
632     /* Now process the block-stash. Since the values are stashed column-oriented,
633        set the roworiented flag to column oriented, and after MatSetValues()
634        restore the original flags */
635     r1 = baij->roworiented;
636     r2 = a->roworiented;
637     r3 = b->roworiented;
638     baij->roworiented = PETSC_FALSE;
639     a->roworiented    = PETSC_FALSE;
640     b->roworiented    = PETSC_FALSE;
641     while (1) {
642       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
643       if (!flg) break;
644 
645       for (i=0; i<n;) {
646         /* Now identify the consecutive vals belonging to the same row */
647         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
648         if (j < n) ncols = j-i;
649         else       ncols = n-i;
650         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
651         i = j;
652       }
653     }
654     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
655     baij->roworiented = r1;
656     a->roworiented    = r2;
657     b->roworiented    = r3;
658   }
659 
660   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675 
676   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
677     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
678   }
679   b->compressedrow.use = PETSC_TRUE;
680   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
681   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
682 
683   if (baij->rowvalues) {
684     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
685     baij->rowvalues = 0;
686   }
687 
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
693 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
694 {
695   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
696   PetscErrorCode    ierr;
697   PetscInt          bs = mat->bs;
698   PetscMPIInt       size = baij->size,rank = baij->rank;
699   PetscTruth        iascii,isdraw;
700   PetscViewer       sviewer;
701   PetscViewerFormat format;
702 
703   PetscFunctionBegin;
704   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
705   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
706   if (iascii) {
707     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
708     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
709       MatInfo info;
710       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
711       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
712       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
713               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
714               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
715       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
716       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
717       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
718       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
719       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
720       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
721       PetscFunctionReturn(0);
722     } else if (format == PETSC_VIEWER_ASCII_INFO) {
723       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
724       PetscFunctionReturn(0);
725     }
726   }
727 
728   if (isdraw) {
729     PetscDraw       draw;
730     PetscTruth isnull;
731     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
732     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
733   }
734 
735   if (size == 1) {
736     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
737     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
738   } else {
739     /* assemble the entire matrix onto first processor. */
740     Mat         A;
741     Mat_SeqSBAIJ *Aloc;
742     Mat_SeqBAIJ *Bloc;
743     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
744     MatScalar   *a;
745 
746     /* Should this be the same type as mat? */
747     if (!rank) {
748       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
749     } else {
750       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
751     }
752     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
753     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
754     PetscLogObjectParent(mat,A);
755 
756     /* copy over the A part */
757     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
758     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
759     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
760 
761     for (i=0; i<mbs; i++) {
762       rvals[0] = bs*(baij->rstart + i);
763       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764       for (j=ai[i]; j<ai[i+1]; j++) {
765         col = (baij->cstart+aj[j])*bs;
766         for (k=0; k<bs; k++) {
767           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768           col++; a += bs;
769         }
770       }
771     }
772     /* copy over the B part */
773     Bloc = (Mat_SeqBAIJ*)baij->B->data;
774     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
775     for (i=0; i<mbs; i++) {
776       rvals[0] = bs*(baij->rstart + i);
777       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
778       for (j=ai[i]; j<ai[i+1]; j++) {
779         col = baij->garray[aj[j]]*bs;
780         for (k=0; k<bs; k++) {
781           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
782           col++; a += bs;
783         }
784       }
785     }
786     ierr = PetscFree(rvals);CHKERRQ(ierr);
787     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
789     /*
790        Everyone has to call to draw the matrix since the graphics waits are
791        synchronized across all processors that share the PetscDraw object
792     */
793     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
794     if (!rank) {
795       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
796       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
797     }
798     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
799     ierr = MatDestroy(A);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatView_MPISBAIJ"
806 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
807 {
808   PetscErrorCode ierr;
809   PetscTruth     iascii,isdraw,issocket,isbinary;
810 
811   PetscFunctionBegin;
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
815   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
816   if (iascii || isdraw || issocket || isbinary) {
817     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
818   } else {
819     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_MPISBAIJ"
826 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
827 {
828   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
834 #endif
835   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
836   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
837   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
838   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
839   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
840 #if defined (PETSC_USE_CTABLE)
841   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
842 #else
843   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
844 #endif
845   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
846   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
847   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
848   if (baij->slvec0) {
849     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
850     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
851   }
852   if (baij->slvec1) {
853     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
854     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
855     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
856   }
857   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
858   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
859   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
860   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
861 #if defined(PETSC_USE_MAT_SINGLE)
862   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
863 #endif
864   ierr = PetscFree(baij);CHKERRQ(ierr);
865 
866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
869   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatMult_MPISBAIJ"
875 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
876 {
877   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
878   PetscErrorCode ierr;
879   PetscInt       nt,mbs=a->mbs,bs=A->bs;
880   PetscScalar    *x,*from,zero=0.0;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->n) {
885     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
886   }
887   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
888   if (nt != A->m) {
889     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
890   }
891 
892   /* diagonal part */
893   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
894   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
895 
896   /* subdiagonal part */
897   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
898 
899   /* copy x into the vec slvec0 */
900   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
901   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
902   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
903   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
904 
905   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
908 
909   /* supperdiagonal part */
910   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
911 
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
917 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
918 {
919   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
920   PetscErrorCode ierr;
921   PetscInt       nt;
922 
923   PetscFunctionBegin;
924   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
925   if (nt != A->n) {
926     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
927   }
928   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
929   if (nt != A->m) {
930     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
931   }
932 
933   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
934   /* do diagonal part */
935   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
936   /* do supperdiagonal part */
937   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
938   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
939   /* do subdiagonal part */
940   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
941   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
942   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
943 
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
949 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
950 {
951   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       mbs=a->mbs,bs=A->bs;
954   PetscScalar    *x,*from,zero=0.0;
955 
956   PetscFunctionBegin;
957   /*
958   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
959   PetscSynchronizedFlush(A->comm);
960   */
961   /* diagonal part */
962   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
963   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
964 
965   /* subdiagonal part */
966   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
967 
968   /* copy x into the vec slvec0 */
969   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
970   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
971   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
972   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
973 
974   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
975   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
976   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
977 
978   /* supperdiagonal part */
979   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
986 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
987 {
988   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
993   /* do diagonal part */
994   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
995   /* do supperdiagonal part */
996   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
997   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
998 
999   /* do subdiagonal part */
1000   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1001   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1003 
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1009 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1020 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /*
1030   This only works correctly for square matrices where the subblock A->A is the
1031    diagonal block
1032 */
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1035 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1036 {
1037   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1042   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatScale_MPISBAIJ"
1048 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1049 {
1050   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1055   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1061 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1062 {
1063   PetscFunctionBegin;
1064   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) {
1076     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1077   }
1078   baij->getrowactive = PETSC_FALSE;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1084 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1085 {
1086   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1087   PetscErrorCode ierr;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1091   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1097 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1098 {
1099   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1100   Mat            A = a->A,B = a->B;
1101   PetscErrorCode ierr;
1102   PetscReal      isend[5],irecv[5];
1103 
1104   PetscFunctionBegin;
1105   info->block_size     = (PetscReal)matin->bs;
1106   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1107   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1108   isend[3] = info->memory;  isend[4] = info->mallocs;
1109   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1110   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1111   isend[3] += info->memory;  isend[4] += info->mallocs;
1112   if (flag == MAT_LOCAL) {
1113     info->nz_used      = isend[0];
1114     info->nz_allocated = isend[1];
1115     info->nz_unneeded  = isend[2];
1116     info->memory       = isend[3];
1117     info->mallocs      = isend[4];
1118   } else if (flag == MAT_GLOBAL_MAX) {
1119     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1120     info->nz_used      = irecv[0];
1121     info->nz_allocated = irecv[1];
1122     info->nz_unneeded  = irecv[2];
1123     info->memory       = irecv[3];
1124     info->mallocs      = irecv[4];
1125   } else if (flag == MAT_GLOBAL_SUM) {
1126     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1127     info->nz_used      = irecv[0];
1128     info->nz_allocated = irecv[1];
1129     info->nz_unneeded  = irecv[2];
1130     info->memory       = irecv[3];
1131     info->mallocs      = irecv[4];
1132   } else {
1133     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1134   }
1135   info->rows_global       = (PetscReal)A->M;
1136   info->columns_global    = (PetscReal)A->N;
1137   info->rows_local        = (PetscReal)A->m;
1138   info->columns_local     = (PetscReal)A->N;
1139   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1140   info->fill_ratio_needed = 0;
1141   info->factor_mallocs    = 0;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1147 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1148 {
1149   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   switch (op) {
1154   case MAT_NO_NEW_NONZERO_LOCATIONS:
1155   case MAT_YES_NEW_NONZERO_LOCATIONS:
1156   case MAT_COLUMNS_UNSORTED:
1157   case MAT_COLUMNS_SORTED:
1158   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1159   case MAT_KEEP_ZEROED_ROWS:
1160   case MAT_NEW_NONZERO_LOCATION_ERR:
1161     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1162     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1163     break;
1164   case MAT_ROW_ORIENTED:
1165     a->roworiented = PETSC_TRUE;
1166     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1167     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1168     break;
1169   case MAT_ROWS_SORTED:
1170   case MAT_ROWS_UNSORTED:
1171   case MAT_YES_NEW_DIAGONALS:
1172     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1173     break;
1174   case MAT_COLUMN_ORIENTED:
1175     a->roworiented = PETSC_FALSE;
1176     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1178     break;
1179   case MAT_IGNORE_OFF_PROC_ENTRIES:
1180     a->donotstash = PETSC_TRUE;
1181     break;
1182   case MAT_NO_NEW_DIAGONALS:
1183     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1184   case MAT_USE_HASH_TABLE:
1185     a->ht_flag = PETSC_TRUE;
1186     break;
1187   case MAT_NOT_SYMMETRIC:
1188   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1189   case MAT_HERMITIAN:
1190     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1191   case MAT_SYMMETRIC:
1192   case MAT_STRUCTURALLY_SYMMETRIC:
1193   case MAT_NOT_HERMITIAN:
1194   case MAT_SYMMETRY_ETERNAL:
1195   case MAT_NOT_SYMMETRY_ETERNAL:
1196     break;
1197   default:
1198     SETERRQ(PETSC_ERR_SUP,"unknown option");
1199   }
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1205 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1206 {
1207   PetscErrorCode ierr;
1208   PetscFunctionBegin;
1209   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1215 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1216 {
1217   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1218   Mat            a=baij->A, b=baij->B;
1219   PetscErrorCode ierr;
1220   PetscInt       nv,m,n;
1221   PetscTruth     flg;
1222 
1223   PetscFunctionBegin;
1224   if (ll != rr){
1225     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1226     if (!flg)
1227       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1228   }
1229   if (!ll) PetscFunctionReturn(0);
1230 
1231   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1232   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1233 
1234   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1235   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1236 
1237   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1238 
1239   /* left diagonalscale the off-diagonal part */
1240   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1241 
1242   /* scale the diagonal part */
1243   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1244 
1245   /* right diagonalscale the off-diagonal part */
1246   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1247   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1248   PetscFunctionReturn(0);
1249 }
1250 
1251 #undef __FUNCT__
1252 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1253 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1254 {
1255   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1256   MPI_Comm          comm = A->comm;
1257   static PetscTruth called = PETSC_FALSE;
1258   PetscErrorCode    ierr;
1259 
1260   PetscFunctionBegin;
1261   if (!a->rank) {
1262     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1263   }
1264   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1265   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1266   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1267   PetscFunctionReturn(0);
1268 }
1269 
1270 #undef __FUNCT__
1271 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1272 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1273 {
1274   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1275   PetscErrorCode ierr;
1276 
1277   PetscFunctionBegin;
1278   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1279   PetscFunctionReturn(0);
1280 }
1281 
1282 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1283 
1284 #undef __FUNCT__
1285 #define __FUNCT__ "MatEqual_MPISBAIJ"
1286 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1287 {
1288   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1289   Mat            a,b,c,d;
1290   PetscTruth     flg;
1291   PetscErrorCode ierr;
1292 
1293   PetscFunctionBegin;
1294   a = matA->A; b = matA->B;
1295   c = matB->A; d = matB->B;
1296 
1297   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1298   if (flg) {
1299     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1300   }
1301   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1302   PetscFunctionReturn(0);
1303 }
1304 
1305 #undef __FUNCT__
1306 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1307 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1308 {
1309   PetscErrorCode ierr;
1310 
1311   PetscFunctionBegin;
1312   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1313   PetscFunctionReturn(0);
1314 }
1315 
1316 #undef __FUNCT__
1317 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1318 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1319 {
1320   PetscErrorCode ierr;
1321   PetscInt       i;
1322   PetscTruth     flg;
1323 
1324   PetscFunctionBegin;
1325   for (i=0; i<n; i++) {
1326     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1327     if (!flg) {
1328       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1329     }
1330   }
1331   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1332   PetscFunctionReturn(0);
1333 }
1334 
1335 
1336 /* -------------------------------------------------------------------*/
1337 static struct _MatOps MatOps_Values = {
1338        MatSetValues_MPISBAIJ,
1339        MatGetRow_MPISBAIJ,
1340        MatRestoreRow_MPISBAIJ,
1341        MatMult_MPISBAIJ,
1342 /* 4*/ MatMultAdd_MPISBAIJ,
1343        MatMultTranspose_MPISBAIJ,
1344        MatMultTransposeAdd_MPISBAIJ,
1345        0,
1346        0,
1347        0,
1348 /*10*/ 0,
1349        0,
1350        0,
1351        MatRelax_MPISBAIJ,
1352        MatTranspose_MPISBAIJ,
1353 /*15*/ MatGetInfo_MPISBAIJ,
1354        MatEqual_MPISBAIJ,
1355        MatGetDiagonal_MPISBAIJ,
1356        MatDiagonalScale_MPISBAIJ,
1357        MatNorm_MPISBAIJ,
1358 /*20*/ MatAssemblyBegin_MPISBAIJ,
1359        MatAssemblyEnd_MPISBAIJ,
1360        0,
1361        MatSetOption_MPISBAIJ,
1362        MatZeroEntries_MPISBAIJ,
1363 /*25*/ 0,
1364        0,
1365        0,
1366        0,
1367        0,
1368 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1369        0,
1370        0,
1371        0,
1372        0,
1373 /*35*/ MatDuplicate_MPISBAIJ,
1374        0,
1375        0,
1376        0,
1377        0,
1378 /*40*/ 0,
1379        MatGetSubMatrices_MPISBAIJ,
1380        MatIncreaseOverlap_MPISBAIJ,
1381        MatGetValues_MPISBAIJ,
1382        0,
1383 /*45*/ MatPrintHelp_MPISBAIJ,
1384        MatScale_MPISBAIJ,
1385        0,
1386        0,
1387        0,
1388 /*50*/ 0,
1389        0,
1390        0,
1391        0,
1392        0,
1393 /*55*/ 0,
1394        0,
1395        MatSetUnfactored_MPISBAIJ,
1396        0,
1397        MatSetValuesBlocked_MPISBAIJ,
1398 /*60*/ 0,
1399        0,
1400        0,
1401        MatGetPetscMaps_Petsc,
1402        0,
1403 /*65*/ 0,
1404        0,
1405        0,
1406        0,
1407        0,
1408 /*70*/ MatGetRowMax_MPISBAIJ,
1409        0,
1410        0,
1411        0,
1412        0,
1413 /*75*/ 0,
1414        0,
1415        0,
1416        0,
1417        0,
1418 /*80*/ 0,
1419        0,
1420        0,
1421        0,
1422        MatLoad_MPISBAIJ,
1423 /*85*/ 0,
1424        0,
1425        0,
1426        0,
1427        0,
1428 /*90*/ 0,
1429        0,
1430        0,
1431        0,
1432        0,
1433 /*95*/ 0,
1434        0,
1435        0,
1436        0};
1437 
1438 
1439 EXTERN_C_BEGIN
1440 #undef __FUNCT__
1441 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1442 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1443 {
1444   PetscFunctionBegin;
1445   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1446   *iscopy = PETSC_FALSE;
1447   PetscFunctionReturn(0);
1448 }
1449 EXTERN_C_END
1450 
1451 EXTERN_C_BEGIN
1452 #undef __FUNCT__
1453 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1454 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1455 {
1456   Mat_MPISBAIJ   *b;
1457   PetscErrorCode ierr;
1458   PetscInt       i,mbs,Mbs;
1459 
1460   PetscFunctionBegin;
1461   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1462 
1463   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1464   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1465   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1466   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1467   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1468   if (d_nnz) {
1469     for (i=0; i<B->m/bs; i++) {
1470       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1471     }
1472   }
1473   if (o_nnz) {
1474     for (i=0; i<B->m/bs; i++) {
1475       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1476     }
1477   }
1478   B->preallocated = PETSC_TRUE;
1479   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1480   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1481   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1482   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1483 
1484   b   = (Mat_MPISBAIJ*)B->data;
1485   mbs = B->m/bs;
1486   Mbs = B->M/bs;
1487   if (mbs*bs != B->m) {
1488     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1489   }
1490 
1491   B->bs  = bs;
1492   b->bs2 = bs*bs;
1493   b->mbs = mbs;
1494   b->nbs = mbs;
1495   b->Mbs = Mbs;
1496   b->Nbs = Mbs;
1497 
1498   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1499   b->rowners[0]    = 0;
1500   for (i=2; i<=b->size; i++) {
1501     b->rowners[i] += b->rowners[i-1];
1502   }
1503   b->rstart    = b->rowners[b->rank];
1504   b->rend      = b->rowners[b->rank+1];
1505   b->cstart    = b->rstart;
1506   b->cend      = b->rend;
1507   for (i=0; i<=b->size; i++) {
1508     b->rowners_bs[i] = b->rowners[i]*bs;
1509   }
1510   b->rstart_bs = b-> rstart*bs;
1511   b->rend_bs   = b->rend*bs;
1512 
1513   b->cstart_bs = b->cstart*bs;
1514   b->cend_bs   = b->cend*bs;
1515 
1516   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1517   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1518   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1519   PetscLogObjectParent(B,b->A);
1520 
1521   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1522   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1523   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1524   PetscLogObjectParent(B,b->B);
1525 
1526   /* build cache for off array entries formed */
1527   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1528 
1529   PetscFunctionReturn(0);
1530 }
1531 EXTERN_C_END
1532 
1533 /*MC
1534    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1535    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1536 
1537    Options Database Keys:
1538 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1539 
1540   Level: beginner
1541 
1542 .seealso: MatCreateMPISBAIJ
1543 M*/
1544 
1545 EXTERN_C_BEGIN
1546 #undef __FUNCT__
1547 #define __FUNCT__ "MatCreate_MPISBAIJ"
1548 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1549 {
1550   Mat_MPISBAIJ   *b;
1551   PetscErrorCode ierr;
1552   PetscTruth     flg;
1553 
1554   PetscFunctionBegin;
1555 
1556   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1557   B->data = (void*)b;
1558   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1559 
1560   B->ops->destroy    = MatDestroy_MPISBAIJ;
1561   B->ops->view       = MatView_MPISBAIJ;
1562   B->mapping    = 0;
1563   B->factor     = 0;
1564   B->assembled  = PETSC_FALSE;
1565 
1566   B->insertmode = NOT_SET_VALUES;
1567   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1568   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1569 
1570   /* build local table of row and column ownerships */
1571   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1572   b->cowners    = b->rowners + b->size + 2;
1573   b->rowners_bs = b->cowners + b->size + 2;
1574   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1575 
1576   /* build cache for off array entries formed */
1577   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1578   b->donotstash  = PETSC_FALSE;
1579   b->colmap      = PETSC_NULL;
1580   b->garray      = PETSC_NULL;
1581   b->roworiented = PETSC_TRUE;
1582 
1583 #if defined(PETSC_USE_MAT_SINGLE)
1584   /* stuff for MatSetValues_XXX in single precision */
1585   b->setvalueslen     = 0;
1586   b->setvaluescopy    = PETSC_NULL;
1587 #endif
1588 
1589   /* stuff used in block assembly */
1590   b->barray       = 0;
1591 
1592   /* stuff used for matrix vector multiply */
1593   b->lvec         = 0;
1594   b->Mvctx        = 0;
1595   b->slvec0       = 0;
1596   b->slvec0b      = 0;
1597   b->slvec1       = 0;
1598   b->slvec1a      = 0;
1599   b->slvec1b      = 0;
1600   b->sMvctx       = 0;
1601 
1602   /* stuff for MatGetRow() */
1603   b->rowindices   = 0;
1604   b->rowvalues    = 0;
1605   b->getrowactive = PETSC_FALSE;
1606 
1607   /* hash table stuff */
1608   b->ht           = 0;
1609   b->hd           = 0;
1610   b->ht_size      = 0;
1611   b->ht_flag      = PETSC_FALSE;
1612   b->ht_fact      = 0;
1613   b->ht_total_ct  = 0;
1614   b->ht_insert_ct = 0;
1615 
1616   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1617   if (flg) {
1618     PetscReal fact = 1.39;
1619     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1620     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1621     if (fact <= 1.0) fact = 1.39;
1622     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1623     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1624   }
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1626                                      "MatStoreValues_MPISBAIJ",
1627                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1629                                      "MatRetrieveValues_MPISBAIJ",
1630                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1631   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1632                                      "MatGetDiagonalBlock_MPISBAIJ",
1633                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1634   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1635                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1636                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1637   B->symmetric                  = PETSC_TRUE;
1638   B->structurally_symmetric     = PETSC_TRUE;
1639   B->symmetric_set              = PETSC_TRUE;
1640   B->structurally_symmetric_set = PETSC_TRUE;
1641   PetscFunctionReturn(0);
1642 }
1643 EXTERN_C_END
1644 
1645 /*MC
1646    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1647 
1648    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1649    and MATMPISBAIJ otherwise.
1650 
1651    Options Database Keys:
1652 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1653 
1654   Level: beginner
1655 
1656 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1657 M*/
1658 
1659 EXTERN_C_BEGIN
1660 #undef __FUNCT__
1661 #define __FUNCT__ "MatCreate_SBAIJ"
1662 PetscErrorCode MatCreate_SBAIJ(Mat A)
1663 {
1664   PetscErrorCode ierr;
1665   PetscMPIInt    size;
1666 
1667   PetscFunctionBegin;
1668   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1669   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1670   if (size == 1) {
1671     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1672   } else {
1673     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1674   }
1675   PetscFunctionReturn(0);
1676 }
1677 EXTERN_C_END
1678 
1679 #undef __FUNCT__
1680 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1681 /*@C
1682    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1683    the user should preallocate the matrix storage by setting the parameters
1684    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1685    performance can be increased by more than a factor of 50.
1686 
1687    Collective on Mat
1688 
1689    Input Parameters:
1690 +  A - the matrix
1691 .  bs   - size of blockk
1692 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1693            submatrix  (same for all local rows)
1694 .  d_nnz - array containing the number of block nonzeros in the various block rows
1695            in the upper triangular and diagonal part of the in diagonal portion of the local
1696            (possibly different for each block row) or PETSC_NULL.  You must leave room
1697            for the diagonal entry even if it is zero.
1698 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1699            submatrix (same for all local rows).
1700 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1701            off-diagonal portion of the local submatrix (possibly different for
1702            each block row) or PETSC_NULL.
1703 
1704 
1705    Options Database Keys:
1706 .   -mat_no_unroll - uses code that does not unroll the loops in the
1707                      block calculations (much slower)
1708 .   -mat_block_size - size of the blocks to use
1709 
1710    Notes:
1711 
1712    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1713    than it must be used on all processors that share the object for that argument.
1714 
1715    If the *_nnz parameter is given then the *_nz parameter is ignored
1716 
1717    Storage Information:
1718    For a square global matrix we define each processor's diagonal portion
1719    to be its local rows and the corresponding columns (a square submatrix);
1720    each processor's off-diagonal portion encompasses the remainder of the
1721    local matrix (a rectangular submatrix).
1722 
1723    The user can specify preallocated storage for the diagonal part of
1724    the local submatrix with either d_nz or d_nnz (not both).  Set
1725    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1726    memory allocation.  Likewise, specify preallocated storage for the
1727    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1728 
1729    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1730    the figure below we depict these three local rows and all columns (0-11).
1731 
1732 .vb
1733            0 1 2 3 4 5 6 7 8 9 10 11
1734           -------------------
1735    row 3  |  o o o d d d o o o o o o
1736    row 4  |  o o o d d d o o o o o o
1737    row 5  |  o o o d d d o o o o o o
1738           -------------------
1739 .ve
1740 
1741    Thus, any entries in the d locations are stored in the d (diagonal)
1742    submatrix, and any entries in the o locations are stored in the
1743    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1744    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1745 
1746    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1747    plus the diagonal part of the d matrix,
1748    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1749    In general, for PDE problems in which most nonzeros are near the diagonal,
1750    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1751    or you will get TERRIBLE performance; see the users' manual chapter on
1752    matrices.
1753 
1754    Level: intermediate
1755 
1756 .keywords: matrix, block, aij, compressed row, sparse, parallel
1757 
1758 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1759 @*/
1760 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1761 {
1762   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1763 
1764   PetscFunctionBegin;
1765   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1766   if (f) {
1767     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1768   }
1769   PetscFunctionReturn(0);
1770 }
1771 
1772 #undef __FUNCT__
1773 #define __FUNCT__ "MatCreateMPISBAIJ"
1774 /*@C
1775    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1776    (block compressed row).  For good matrix assembly performance
1777    the user should preallocate the matrix storage by setting the parameters
1778    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1779    performance can be increased by more than a factor of 50.
1780 
1781    Collective on MPI_Comm
1782 
1783    Input Parameters:
1784 +  comm - MPI communicator
1785 .  bs   - size of blockk
1786 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1787            This value should be the same as the local size used in creating the
1788            y vector for the matrix-vector product y = Ax.
1789 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1790            This value should be the same as the local size used in creating the
1791            x vector for the matrix-vector product y = Ax.
1792 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1793 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1794 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1795            submatrix  (same for all local rows)
1796 .  d_nnz - array containing the number of block nonzeros in the various block rows
1797            in the upper triangular portion of the in diagonal portion of the local
1798            (possibly different for each block block row) or PETSC_NULL.
1799            You must leave room for the diagonal entry even if it is zero.
1800 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1801            submatrix (same for all local rows).
1802 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1803            off-diagonal portion of the local submatrix (possibly different for
1804            each block row) or PETSC_NULL.
1805 
1806    Output Parameter:
1807 .  A - the matrix
1808 
1809    Options Database Keys:
1810 .   -mat_no_unroll - uses code that does not unroll the loops in the
1811                      block calculations (much slower)
1812 .   -mat_block_size - size of the blocks to use
1813 .   -mat_mpi - use the parallel matrix data structures even on one processor
1814                (defaults to using SeqBAIJ format on one processor)
1815 
1816    Notes:
1817    The user MUST specify either the local or global matrix dimensions
1818    (possibly both).
1819 
1820    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1821    than it must be used on all processors that share the object for that argument.
1822 
1823    If the *_nnz parameter is given then the *_nz parameter is ignored
1824 
1825    Storage Information:
1826    For a square global matrix we define each processor's diagonal portion
1827    to be its local rows and the corresponding columns (a square submatrix);
1828    each processor's off-diagonal portion encompasses the remainder of the
1829    local matrix (a rectangular submatrix).
1830 
1831    The user can specify preallocated storage for the diagonal part of
1832    the local submatrix with either d_nz or d_nnz (not both).  Set
1833    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1834    memory allocation.  Likewise, specify preallocated storage for the
1835    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1836 
1837    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1838    the figure below we depict these three local rows and all columns (0-11).
1839 
1840 .vb
1841            0 1 2 3 4 5 6 7 8 9 10 11
1842           -------------------
1843    row 3  |  o o o d d d o o o o o o
1844    row 4  |  o o o d d d o o o o o o
1845    row 5  |  o o o d d d o o o o o o
1846           -------------------
1847 .ve
1848 
1849    Thus, any entries in the d locations are stored in the d (diagonal)
1850    submatrix, and any entries in the o locations are stored in the
1851    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1852    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1853 
1854    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1855    plus the diagonal part of the d matrix,
1856    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1857    In general, for PDE problems in which most nonzeros are near the diagonal,
1858    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1859    or you will get TERRIBLE performance; see the users' manual chapter on
1860    matrices.
1861 
1862    Level: intermediate
1863 
1864 .keywords: matrix, block, aij, compressed row, sparse, parallel
1865 
1866 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1867 @*/
1868 
1869 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1870 {
1871   PetscErrorCode ierr;
1872   PetscMPIInt    size;
1873 
1874   PetscFunctionBegin;
1875   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1876   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1877   if (size > 1) {
1878     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1879     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1880   } else {
1881     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1882     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1883   }
1884   PetscFunctionReturn(0);
1885 }
1886 
1887 
1888 #undef __FUNCT__
1889 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1890 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1891 {
1892   Mat            mat;
1893   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1894   PetscErrorCode ierr;
1895   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1896   PetscScalar    *array;
1897 
1898   PetscFunctionBegin;
1899   *newmat       = 0;
1900   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1901   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1902   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1903 
1904   mat->factor       = matin->factor;
1905   mat->preallocated = PETSC_TRUE;
1906   mat->assembled    = PETSC_TRUE;
1907   mat->insertmode   = NOT_SET_VALUES;
1908 
1909   a = (Mat_MPISBAIJ*)mat->data;
1910   mat->bs  = matin->bs;
1911   a->bs2   = oldmat->bs2;
1912   a->mbs   = oldmat->mbs;
1913   a->nbs   = oldmat->nbs;
1914   a->Mbs   = oldmat->Mbs;
1915   a->Nbs   = oldmat->Nbs;
1916 
1917   a->rstart       = oldmat->rstart;
1918   a->rend         = oldmat->rend;
1919   a->cstart       = oldmat->cstart;
1920   a->cend         = oldmat->cend;
1921   a->size         = oldmat->size;
1922   a->rank         = oldmat->rank;
1923   a->donotstash   = oldmat->donotstash;
1924   a->roworiented  = oldmat->roworiented;
1925   a->rowindices   = 0;
1926   a->rowvalues    = 0;
1927   a->getrowactive = PETSC_FALSE;
1928   a->barray       = 0;
1929   a->rstart_bs    = oldmat->rstart_bs;
1930   a->rend_bs      = oldmat->rend_bs;
1931   a->cstart_bs    = oldmat->cstart_bs;
1932   a->cend_bs      = oldmat->cend_bs;
1933 
1934   /* hash table stuff */
1935   a->ht           = 0;
1936   a->hd           = 0;
1937   a->ht_size      = 0;
1938   a->ht_flag      = oldmat->ht_flag;
1939   a->ht_fact      = oldmat->ht_fact;
1940   a->ht_total_ct  = 0;
1941   a->ht_insert_ct = 0;
1942 
1943   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1944   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1945   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1946   if (oldmat->colmap) {
1947 #if defined (PETSC_USE_CTABLE)
1948     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1949 #else
1950     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1951     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
1952     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1953 #endif
1954   } else a->colmap = 0;
1955 
1956   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1957     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1958     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1959     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1960   } else a->garray = 0;
1961 
1962   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1963   PetscLogObjectParent(mat,a->lvec);
1964   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1965   PetscLogObjectParent(mat,a->Mvctx);
1966 
1967   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1968   PetscLogObjectParent(mat,a->slvec0);
1969   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1970   PetscLogObjectParent(mat,a->slvec1);
1971 
1972   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1973   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1974   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1975   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1976   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1977   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1978   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1979   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1980   PetscLogObjectParent(mat,a->slvec0);
1981   PetscLogObjectParent(mat,a->slvec1);
1982   PetscLogObjectParent(mat,a->slvec0b);
1983   PetscLogObjectParent(mat,a->slvec1a);
1984   PetscLogObjectParent(mat,a->slvec1b);
1985 
1986   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1987   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1988   a->sMvctx = oldmat->sMvctx;
1989   PetscLogObjectParent(mat,a->sMvctx);
1990 
1991   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1992   PetscLogObjectParent(mat,a->A);
1993   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1994   PetscLogObjectParent(mat,a->B);
1995   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1996   *newmat = mat;
1997   PetscFunctionReturn(0);
1998 }
1999 
2000 #include "petscsys.h"
2001 
2002 #undef __FUNCT__
2003 #define __FUNCT__ "MatLoad_MPISBAIJ"
2004 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2005 {
2006   Mat            A;
2007   PetscErrorCode ierr;
2008   PetscInt       i,nz,j,rstart,rend;
2009   PetscScalar    *vals,*buf;
2010   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2011   MPI_Status     status;
2012   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2013   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2014   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2015   PetscInt       bs=1,Mbs,mbs,extra_rows;
2016   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2017   PetscInt       dcount,kmax,k,nzcount,tmp;
2018   int            fd;
2019 
2020   PetscFunctionBegin;
2021   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2022 
2023   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2024   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2025   if (!rank) {
2026     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2027     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2028     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2029     if (header[3] < 0) {
2030       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2031     }
2032   }
2033 
2034   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2035   M = header[1]; N = header[2];
2036 
2037   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2038 
2039   /*
2040      This code adds extra rows to make sure the number of rows is
2041      divisible by the blocksize
2042   */
2043   Mbs        = M/bs;
2044   extra_rows = bs - M + bs*(Mbs);
2045   if (extra_rows == bs) extra_rows = 0;
2046   else                  Mbs++;
2047   if (extra_rows &&!rank) {
2048     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2049   }
2050 
2051   /* determine ownership of all rows */
2052   mbs        = Mbs/size + ((Mbs % size) > rank);
2053   m          = mbs*bs;
2054   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2055   browners   = rowners + size + 1;
2056   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2057   rowners[0] = 0;
2058   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2059   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2060   rstart = rowners[rank];
2061   rend   = rowners[rank+1];
2062 
2063   /* distribute row lengths to all processors */
2064   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2065   if (!rank) {
2066     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2067     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2068     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2069     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2070     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2071     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2072     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2073   } else {
2074     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2075   }
2076 
2077   if (!rank) {   /* procs[0] */
2078     /* calculate the number of nonzeros on each processor */
2079     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2080     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2081     for (i=0; i<size; i++) {
2082       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2083         procsnz[i] += rowlengths[j];
2084       }
2085     }
2086     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2087 
2088     /* determine max buffer needed and allocate it */
2089     maxnz = 0;
2090     for (i=0; i<size; i++) {
2091       maxnz = PetscMax(maxnz,procsnz[i]);
2092     }
2093     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2094 
2095     /* read in my part of the matrix column indices  */
2096     nz     = procsnz[0];
2097     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2098     mycols = ibuf;
2099     if (size == 1)  nz -= extra_rows;
2100     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2101     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2102 
2103     /* read in every ones (except the last) and ship off */
2104     for (i=1; i<size-1; i++) {
2105       nz   = procsnz[i];
2106       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2107       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2108     }
2109     /* read in the stuff for the last proc */
2110     if (size != 1) {
2111       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2112       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2113       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2114       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2115     }
2116     ierr = PetscFree(cols);CHKERRQ(ierr);
2117   } else {  /* procs[i], i>0 */
2118     /* determine buffer space needed for message */
2119     nz = 0;
2120     for (i=0; i<m; i++) {
2121       nz += locrowlens[i];
2122     }
2123     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2124     mycols = ibuf;
2125     /* receive message of column indices*/
2126     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2127     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2128     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2129   }
2130 
2131   /* loop over local rows, determining number of off diagonal entries */
2132   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2133   odlens   = dlens + (rend-rstart);
2134   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2135   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2136   masked1  = mask    + Mbs;
2137   masked2  = masked1 + Mbs;
2138   rowcount = 0; nzcount = 0;
2139   for (i=0; i<mbs; i++) {
2140     dcount  = 0;
2141     odcount = 0;
2142     for (j=0; j<bs; j++) {
2143       kmax = locrowlens[rowcount];
2144       for (k=0; k<kmax; k++) {
2145         tmp = mycols[nzcount++]/bs; /* block col. index */
2146         if (!mask[tmp]) {
2147           mask[tmp] = 1;
2148           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2149           else masked1[dcount++] = tmp; /* entry in diag portion */
2150         }
2151       }
2152       rowcount++;
2153     }
2154 
2155     dlens[i]  = dcount;  /* d_nzz[i] */
2156     odlens[i] = odcount; /* o_nzz[i] */
2157 
2158     /* zero out the mask elements we set */
2159     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2160     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2161   }
2162 
2163   /* create our matrix */
2164   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2165   ierr = MatSetType(A,type);CHKERRQ(ierr);
2166   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2167   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2168 
2169   if (!rank) {
2170     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2171     /* read in my part of the matrix numerical values  */
2172     nz = procsnz[0];
2173     vals = buf;
2174     mycols = ibuf;
2175     if (size == 1)  nz -= extra_rows;
2176     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2177     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2178 
2179     /* insert into matrix */
2180     jj      = rstart*bs;
2181     for (i=0; i<m; i++) {
2182       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2183       mycols += locrowlens[i];
2184       vals   += locrowlens[i];
2185       jj++;
2186     }
2187 
2188     /* read in other processors (except the last one) and ship out */
2189     for (i=1; i<size-1; i++) {
2190       nz   = procsnz[i];
2191       vals = buf;
2192       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2193       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2194     }
2195     /* the last proc */
2196     if (size != 1){
2197       nz   = procsnz[i] - extra_rows;
2198       vals = buf;
2199       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2200       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2201       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2202     }
2203     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2204 
2205   } else {
2206     /* receive numeric values */
2207     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2208 
2209     /* receive message of values*/
2210     vals   = buf;
2211     mycols = ibuf;
2212     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2213     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2214     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2215 
2216     /* insert into matrix */
2217     jj      = rstart*bs;
2218     for (i=0; i<m; i++) {
2219       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2220       mycols += locrowlens[i];
2221       vals   += locrowlens[i];
2222       jj++;
2223     }
2224   }
2225 
2226   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2227   ierr = PetscFree(buf);CHKERRQ(ierr);
2228   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2229   ierr = PetscFree(rowners);CHKERRQ(ierr);
2230   ierr = PetscFree(dlens);CHKERRQ(ierr);
2231   ierr = PetscFree(mask);CHKERRQ(ierr);
2232   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2233   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2234   *newmat = A;
2235   PetscFunctionReturn(0);
2236 }
2237 
2238 #undef __FUNCT__
2239 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2240 /*XXXXX@
2241    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2242 
2243    Input Parameters:
2244 .  mat  - the matrix
2245 .  fact - factor
2246 
2247    Collective on Mat
2248 
2249    Level: advanced
2250 
2251   Notes:
2252    This can also be set by the command line option: -mat_use_hash_table fact
2253 
2254 .keywords: matrix, hashtable, factor, HT
2255 
2256 .seealso: MatSetOption()
2257 @XXXXX*/
2258 
2259 
2260 #undef __FUNCT__
2261 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2262 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2263 {
2264   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2265   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2266   PetscReal      atmp;
2267   PetscReal      *work,*svalues,*rvalues;
2268   PetscErrorCode ierr;
2269   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2270   PetscMPIInt    rank,size;
2271   PetscInt       *rowners_bs,dest,count,source;
2272   PetscScalar    *va;
2273   MatScalar      *ba;
2274   MPI_Status     stat;
2275 
2276   PetscFunctionBegin;
2277   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2278   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2279 
2280   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2281   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2282 
2283   bs   = A->bs;
2284   mbs  = a->mbs;
2285   Mbs  = a->Mbs;
2286   ba   = b->a;
2287   bi   = b->i;
2288   bj   = b->j;
2289 
2290   /* find ownerships */
2291   rowners_bs = a->rowners_bs;
2292 
2293   /* each proc creates an array to be distributed */
2294   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2295   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2296 
2297   /* row_max for B */
2298   if (rank != size-1){
2299     for (i=0; i<mbs; i++) {
2300       ncols = bi[1] - bi[0]; bi++;
2301       brow  = bs*i;
2302       for (j=0; j<ncols; j++){
2303         bcol = bs*(*bj);
2304         for (kcol=0; kcol<bs; kcol++){
2305           col = bcol + kcol;                 /* local col index */
2306           col += rowners_bs[rank+1];      /* global col index */
2307           for (krow=0; krow<bs; krow++){
2308             atmp = PetscAbsScalar(*ba); ba++;
2309             row = brow + krow;    /* local row index */
2310             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2311             if (work[col] < atmp) work[col] = atmp;
2312           }
2313         }
2314         bj++;
2315       }
2316     }
2317 
2318     /* send values to its owners */
2319     for (dest=rank+1; dest<size; dest++){
2320       svalues = work + rowners_bs[dest];
2321       count   = rowners_bs[dest+1]-rowners_bs[dest];
2322       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2323     }
2324   }
2325 
2326   /* receive values */
2327   if (rank){
2328     rvalues = work;
2329     count   = rowners_bs[rank+1]-rowners_bs[rank];
2330     for (source=0; source<rank; source++){
2331       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2332       /* process values */
2333       for (i=0; i<count; i++){
2334         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2335       }
2336     }
2337   }
2338 
2339   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2340   ierr = PetscFree(work);CHKERRQ(ierr);
2341   PetscFunctionReturn(0);
2342 }
2343 
2344 #undef __FUNCT__
2345 #define __FUNCT__ "MatRelax_MPISBAIJ"
2346 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2347 {
2348   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2349   PetscErrorCode ierr;
2350   PetscInt       mbs=mat->mbs,bs=matin->bs;
2351   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2352   Vec            bb1;
2353 
2354   PetscFunctionBegin;
2355   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2356   if (bs > 1)
2357     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2358 
2359   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2360     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2361       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2362       its--;
2363     }
2364 
2365     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2366     while (its--){
2367 
2368       /* lower triangular part: slvec0b = - B^T*xx */
2369       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2370 
2371       /* copy xx into slvec0a */
2372       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2373       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2374       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2375       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2376 
2377       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2378 
2379       /* copy bb into slvec1a */
2380       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2381       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2382       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2383       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2384 
2385       /* set slvec1b = 0 */
2386       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2387 
2388       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2389       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2390       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2391       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2392 
2393       /* upper triangular part: bb1 = bb1 - B*x */
2394       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2395 
2396       /* local diagonal sweep */
2397       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2398     }
2399     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2400   } else {
2401     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2402   }
2403   PetscFunctionReturn(0);
2404 }
2405 
2406 #undef __FUNCT__
2407 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2408 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2409 {
2410   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2411   PetscErrorCode ierr;
2412   PetscScalar    mone=-1.0;
2413   Vec            lvec1,bb1;
2414 
2415   PetscFunctionBegin;
2416   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2417   if (matin->bs > 1)
2418     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2419 
2420   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2421     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2422       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2423       its--;
2424     }
2425 
2426     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2427     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2428     while (its--){
2429       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2430 
2431       /* lower diagonal part: bb1 = bb - B^T*xx */
2432       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2433       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2434 
2435       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2436       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2437       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2438 
2439       /* upper diagonal part: bb1 = bb1 - B*x */
2440       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2441       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2442 
2443       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2444 
2445       /* diagonal sweep */
2446       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2447     }
2448     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2449     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2450   } else {
2451     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2452   }
2453   PetscFunctionReturn(0);
2454 }
2455 
2456