xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision ee45ca4afdde1af4d1deda7cd4dc1a4a63a3ea97)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar))); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else {
563       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
564     }
565   }
566   PetscFunctionReturn(0);
567 }
568 
569 #undef __FUNCT__
570 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
571 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
572 {
573   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
574   PetscErrorCode ierr;
575   PetscInt       nstash,reallocs;
576   InsertMode     addv;
577 
578   PetscFunctionBegin;
579   if (baij->donotstash) {
580     PetscFunctionReturn(0);
581   }
582 
583   /* make sure all processors are either in INSERTMODE or ADDMODE */
584   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
585   if (addv == (ADD_VALUES|INSERT_VALUES)) {
586     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
587   }
588   mat->insertmode = addv; /* in case this processor had no cache */
589 
590   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
591   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
592   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
593   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);
594   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
595   PetscLogInfo(0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);
596   PetscFunctionReturn(0);
597 }
598 
599 #undef __FUNCT__
600 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
601 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
602 {
603   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
604   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
605   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
606   PetscErrorCode ierr;
607   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
608   PetscInt       *row,*col,other_disassembled;
609   PetscMPIInt    n;
610   PetscTruth     r1,r2,r3;
611   MatScalar      *val;
612   InsertMode     addv = mat->insertmode;
613 
614   PetscFunctionBegin;
615 
616   if (!baij->donotstash) {
617     while (1) {
618       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
619       if (!flg) break;
620 
621       for (i=0; i<n;) {
622         /* Now identify the consecutive vals belonging to the same row */
623         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
624         if (j < n) ncols = j-i;
625         else       ncols = n-i;
626         /* Now assemble all these values with a single function call */
627         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
628         i = j;
629       }
630     }
631     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
632     /* Now process the block-stash. Since the values are stashed column-oriented,
633        set the roworiented flag to column oriented, and after MatSetValues()
634        restore the original flags */
635     r1 = baij->roworiented;
636     r2 = a->roworiented;
637     r3 = b->roworiented;
638     baij->roworiented = PETSC_FALSE;
639     a->roworiented    = PETSC_FALSE;
640     b->roworiented    = PETSC_FALSE;
641     while (1) {
642       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
643       if (!flg) break;
644 
645       for (i=0; i<n;) {
646         /* Now identify the consecutive vals belonging to the same row */
647         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
648         if (j < n) ncols = j-i;
649         else       ncols = n-i;
650         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
651         i = j;
652       }
653     }
654     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
655     baij->roworiented = r1;
656     a->roworiented    = r2;
657     b->roworiented    = r3;
658   }
659 
660   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
661   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
662 
663   /* determine if any processor has disassembled, if so we must
664      also disassemble ourselfs, in order that we may reassemble. */
665   /*
666      if nonzero structure of submatrix B cannot change then we know that
667      no processor disassembled thus we can skip this stuff
668   */
669   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
670     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
671     if (mat->was_assembled && !other_disassembled) {
672       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
673     }
674   }
675 
676   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
677     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
678   }
679   b->compressedrow.use = PETSC_TRUE;
680   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
681   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
682 
683   if (baij->rowvalues) {
684     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
685     baij->rowvalues = 0;
686   }
687 
688   PetscFunctionReturn(0);
689 }
690 
691 #undef __FUNCT__
692 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
693 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
694 {
695   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
696   PetscErrorCode    ierr;
697   PetscInt          bs = mat->bs;
698   PetscMPIInt       size = baij->size,rank = baij->rank;
699   PetscTruth        iascii,isdraw;
700   PetscViewer       sviewer;
701   PetscViewerFormat format;
702 
703   PetscFunctionBegin;
704   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
705   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
706   if (iascii) {
707     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
708     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
709       MatInfo info;
710       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
711       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
712       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
713               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
714               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
715       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
716       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
717       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
718       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
719       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
720       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
721       PetscFunctionReturn(0);
722     } else if (format == PETSC_VIEWER_ASCII_INFO) {
723       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
724       PetscFunctionReturn(0);
725     }
726   }
727 
728   if (isdraw) {
729     PetscDraw       draw;
730     PetscTruth isnull;
731     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
732     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
733   }
734 
735   if (size == 1) {
736     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
737     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
738   } else {
739     /* assemble the entire matrix onto first processor. */
740     Mat         A;
741     Mat_SeqSBAIJ *Aloc;
742     Mat_SeqBAIJ *Bloc;
743     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
744     MatScalar   *a;
745 
746     /* Should this be the same type as mat? */
747     if (!rank) {
748       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
749     } else {
750       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
751     }
752     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
753     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
754     PetscLogObjectParent(mat,A);
755 
756     /* copy over the A part */
757     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
758     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
759     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
760 
761     for (i=0; i<mbs; i++) {
762       rvals[0] = bs*(baij->rstart + i);
763       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
764       for (j=ai[i]; j<ai[i+1]; j++) {
765         col = (baij->cstart+aj[j])*bs;
766         for (k=0; k<bs; k++) {
767           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
768           col++; a += bs;
769         }
770       }
771     }
772     /* copy over the B part */
773     Bloc = (Mat_SeqBAIJ*)baij->B->data;
774     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
775     for (i=0; i<mbs; i++) {
776       rvals[0] = bs*(baij->rstart + i);
777       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
778       for (j=ai[i]; j<ai[i+1]; j++) {
779         col = baij->garray[aj[j]]*bs;
780         for (k=0; k<bs; k++) {
781           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
782           col++; a += bs;
783         }
784       }
785     }
786     ierr = PetscFree(rvals);CHKERRQ(ierr);
787     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
788     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
789     /*
790        Everyone has to call to draw the matrix since the graphics waits are
791        synchronized across all processors that share the PetscDraw object
792     */
793     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
794     if (!rank) {
795       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
796       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
797     }
798     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
799     ierr = MatDestroy(A);CHKERRQ(ierr);
800   }
801   PetscFunctionReturn(0);
802 }
803 
804 #undef __FUNCT__
805 #define __FUNCT__ "MatView_MPISBAIJ"
806 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
807 {
808   PetscErrorCode ierr;
809   PetscTruth     iascii,isdraw,issocket,isbinary;
810 
811   PetscFunctionBegin;
812   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
813   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
814   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
815   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
816   if (iascii || isdraw || issocket || isbinary) {
817     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
818   } else {
819     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
820   }
821   PetscFunctionReturn(0);
822 }
823 
824 #undef __FUNCT__
825 #define __FUNCT__ "MatDestroy_MPISBAIJ"
826 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
827 {
828   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
829   PetscErrorCode ierr;
830 
831   PetscFunctionBegin;
832 #if defined(PETSC_USE_LOG)
833   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
834 #endif
835   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
836   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
837   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
838   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
839   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
840 #if defined (PETSC_USE_CTABLE)
841   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
842 #else
843   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
844 #endif
845   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
846   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
847   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
848   if (baij->slvec0) {
849     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
850     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
851   }
852   if (baij->slvec1) {
853     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
854     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
855     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
856   }
857   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
858   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
859   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
860   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
861 #if defined(PETSC_USE_MAT_SINGLE)
862   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
863 #endif
864   ierr = PetscFree(baij);CHKERRQ(ierr);
865 
866   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
867   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
868   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
869   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
870   PetscFunctionReturn(0);
871 }
872 
873 #undef __FUNCT__
874 #define __FUNCT__ "MatMult_MPISBAIJ"
875 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
876 {
877   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
878   PetscErrorCode ierr;
879   PetscInt       nt,mbs=a->mbs,bs=A->bs;
880   PetscScalar    *x,*from,zero=0.0;
881 
882   PetscFunctionBegin;
883   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
884   if (nt != A->n) {
885     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
886   }
887   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
888   if (nt != A->m) {
889     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
890   }
891 
892   /* diagonal part */
893   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
894   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
895 
896   /* subdiagonal part */
897   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
898 
899   /* copy x into the vec slvec0 */
900   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
901   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
902   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
903   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
904 
905   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
906   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
907   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
908 
909   /* supperdiagonal part */
910   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
911 
912   PetscFunctionReturn(0);
913 }
914 
915 #undef __FUNCT__
916 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
917 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
918 {
919   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
920   PetscErrorCode ierr;
921   PetscInt       nt;
922 
923   PetscFunctionBegin;
924   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
925   if (nt != A->n) {
926     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
927   }
928   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
929   if (nt != A->m) {
930     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
931   }
932 
933   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
934   /* do diagonal part */
935   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
936   /* do supperdiagonal part */
937   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
938   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
939   /* do subdiagonal part */
940   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
941   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
942   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
943 
944   PetscFunctionReturn(0);
945 }
946 
947 #undef __FUNCT__
948 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
949 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
950 {
951   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
952   PetscErrorCode ierr;
953   PetscInt       mbs=a->mbs,bs=A->bs;
954   PetscScalar    *x,*from,zero=0.0;
955 
956   PetscFunctionBegin;
957   /*
958   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
959   PetscSynchronizedFlush(A->comm);
960   */
961   /* diagonal part */
962   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
963   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
964 
965   /* subdiagonal part */
966   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
967 
968   /* copy x into the vec slvec0 */
969   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
970   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
971   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
972   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
973 
974   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
975   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
976   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
977 
978   /* supperdiagonal part */
979   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
980 
981   PetscFunctionReturn(0);
982 }
983 
984 #undef __FUNCT__
985 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
986 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
987 {
988   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
989   PetscErrorCode ierr;
990 
991   PetscFunctionBegin;
992   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
993   /* do diagonal part */
994   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
995   /* do supperdiagonal part */
996   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
997   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
998 
999   /* do subdiagonal part */
1000   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1001   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1002   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1003 
1004   PetscFunctionReturn(0);
1005 }
1006 
1007 #undef __FUNCT__
1008 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1009 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1010 {
1011   PetscErrorCode ierr;
1012 
1013   PetscFunctionBegin;
1014   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1015   PetscFunctionReturn(0);
1016 }
1017 
1018 #undef __FUNCT__
1019 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1020 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1021 {
1022   PetscErrorCode ierr;
1023 
1024   PetscFunctionBegin;
1025   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1026   PetscFunctionReturn(0);
1027 }
1028 
1029 /*
1030   This only works correctly for square matrices where the subblock A->A is the
1031    diagonal block
1032 */
1033 #undef __FUNCT__
1034 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1035 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1036 {
1037   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1038   PetscErrorCode ierr;
1039 
1040   PetscFunctionBegin;
1041   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1042   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1043   PetscFunctionReturn(0);
1044 }
1045 
1046 #undef __FUNCT__
1047 #define __FUNCT__ "MatScale_MPISBAIJ"
1048 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1049 {
1050   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1051   PetscErrorCode ierr;
1052 
1053   PetscFunctionBegin;
1054   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1055   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1056   PetscFunctionReturn(0);
1057 }
1058 
1059 #undef __FUNCT__
1060 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1061 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1062 {
1063   PetscFunctionBegin;
1064   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1070 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1071 {
1072   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1073 
1074   PetscFunctionBegin;
1075   if (!baij->getrowactive) {
1076     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1077   }
1078   baij->getrowactive = PETSC_FALSE;
1079   PetscFunctionReturn(0);
1080 }
1081 
1082 #undef __FUNCT__
1083 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1084 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1085 {
1086   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1087   PetscErrorCode ierr;
1088 
1089   PetscFunctionBegin;
1090   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1091   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1092   PetscFunctionReturn(0);
1093 }
1094 
1095 #undef __FUNCT__
1096 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1097 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1098 {
1099   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1100   Mat            A = a->A,B = a->B;
1101   PetscErrorCode ierr;
1102   PetscReal      isend[5],irecv[5];
1103 
1104   PetscFunctionBegin;
1105   info->block_size     = (PetscReal)matin->bs;
1106   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1107   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1108   isend[3] = info->memory;  isend[4] = info->mallocs;
1109   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1110   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1111   isend[3] += info->memory;  isend[4] += info->mallocs;
1112   if (flag == MAT_LOCAL) {
1113     info->nz_used      = isend[0];
1114     info->nz_allocated = isend[1];
1115     info->nz_unneeded  = isend[2];
1116     info->memory       = isend[3];
1117     info->mallocs      = isend[4];
1118   } else if (flag == MAT_GLOBAL_MAX) {
1119     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1120     info->nz_used      = irecv[0];
1121     info->nz_allocated = irecv[1];
1122     info->nz_unneeded  = irecv[2];
1123     info->memory       = irecv[3];
1124     info->mallocs      = irecv[4];
1125   } else if (flag == MAT_GLOBAL_SUM) {
1126     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1127     info->nz_used      = irecv[0];
1128     info->nz_allocated = irecv[1];
1129     info->nz_unneeded  = irecv[2];
1130     info->memory       = irecv[3];
1131     info->mallocs      = irecv[4];
1132   } else {
1133     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1134   }
1135   info->rows_global       = (PetscReal)A->M;
1136   info->columns_global    = (PetscReal)A->N;
1137   info->rows_local        = (PetscReal)A->m;
1138   info->columns_local     = (PetscReal)A->N;
1139   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1140   info->fill_ratio_needed = 0;
1141   info->factor_mallocs    = 0;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1147 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1148 {
1149   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   switch (op) {
1154   case MAT_NO_NEW_NONZERO_LOCATIONS:
1155   case MAT_YES_NEW_NONZERO_LOCATIONS:
1156   case MAT_COLUMNS_UNSORTED:
1157   case MAT_COLUMNS_SORTED:
1158   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1159   case MAT_KEEP_ZEROED_ROWS:
1160   case MAT_NEW_NONZERO_LOCATION_ERR:
1161     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1162     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1163     break;
1164   case MAT_ROW_ORIENTED:
1165     a->roworiented = PETSC_TRUE;
1166     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1167     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1168     break;
1169   case MAT_ROWS_SORTED:
1170   case MAT_ROWS_UNSORTED:
1171   case MAT_YES_NEW_DIAGONALS:
1172     PetscLogInfo(A,"Info:MatSetOption_MPIBAIJ:Option ignored\n");
1173     break;
1174   case MAT_COLUMN_ORIENTED:
1175     a->roworiented = PETSC_FALSE;
1176     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1177     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1178     break;
1179   case MAT_IGNORE_OFF_PROC_ENTRIES:
1180     a->donotstash = PETSC_TRUE;
1181     break;
1182   case MAT_NO_NEW_DIAGONALS:
1183     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1184   case MAT_USE_HASH_TABLE:
1185     a->ht_flag = PETSC_TRUE;
1186     break;
1187   case MAT_NOT_SYMMETRIC:
1188   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1189   case MAT_HERMITIAN:
1190     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1191   case MAT_SYMMETRIC:
1192   case MAT_STRUCTURALLY_SYMMETRIC:
1193   case MAT_NOT_HERMITIAN:
1194   case MAT_SYMMETRY_ETERNAL:
1195   case MAT_NOT_SYMMETRY_ETERNAL:
1196     break;
1197   default:
1198     SETERRQ(PETSC_ERR_SUP,"unknown option");
1199   }
1200   PetscFunctionReturn(0);
1201 }
1202 
1203 #undef __FUNCT__
1204 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1205 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1206 {
1207   PetscErrorCode ierr;
1208   PetscFunctionBegin;
1209   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1210   PetscFunctionReturn(0);
1211 }
1212 
1213 #undef __FUNCT__
1214 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1215 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1216 {
1217   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1218   Mat            a=baij->A, b=baij->B;
1219   PetscErrorCode ierr;
1220   PetscInt       nv,m,n;
1221 
1222   PetscFunctionBegin;
1223   if (ll != rr) {
1224     SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1225   }
1226   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1227   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1228   if (rr) {
1229     ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1230     if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1231 
1232     ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1233 
1234     /* left diagonalscale the off-diagonal part */
1235     ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1236 
1237     /* scale the diagonal part */
1238     ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1239 
1240     /* right diagonalscale the off-diagonal part */
1241     ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1242     ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1243   }
1244   PetscFunctionReturn(0);
1245 }
1246 
1247 #undef __FUNCT__
1248 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1249 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1250 {
1251   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1252   MPI_Comm          comm = A->comm;
1253   static PetscTruth called = PETSC_FALSE;
1254   PetscErrorCode    ierr;
1255 
1256   PetscFunctionBegin;
1257   if (!a->rank) {
1258     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1259   }
1260   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1261   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1262   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1263   PetscFunctionReturn(0);
1264 }
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1268 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1269 {
1270   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1271   PetscErrorCode ierr;
1272 
1273   PetscFunctionBegin;
1274   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1275   PetscFunctionReturn(0);
1276 }
1277 
1278 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1279 
1280 #undef __FUNCT__
1281 #define __FUNCT__ "MatEqual_MPISBAIJ"
1282 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1283 {
1284   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1285   Mat            a,b,c,d;
1286   PetscTruth     flg;
1287   PetscErrorCode ierr;
1288 
1289   PetscFunctionBegin;
1290   a = matA->A; b = matA->B;
1291   c = matB->A; d = matB->B;
1292 
1293   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1294   if (flg) {
1295     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1296   }
1297   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1303 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1304 {
1305   PetscErrorCode ierr;
1306 
1307   PetscFunctionBegin;
1308   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1314 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1315 {
1316   PetscErrorCode ierr;
1317   PetscInt       i;
1318   PetscTruth     flg;
1319 
1320   PetscFunctionBegin;
1321   for (i=0; i<n; i++) {
1322     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1323     if (!flg) {
1324       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1325     }
1326   }
1327   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1328   PetscFunctionReturn(0);
1329 }
1330 
1331 
1332 /* -------------------------------------------------------------------*/
1333 static struct _MatOps MatOps_Values = {
1334        MatSetValues_MPISBAIJ,
1335        MatGetRow_MPISBAIJ,
1336        MatRestoreRow_MPISBAIJ,
1337        MatMult_MPISBAIJ,
1338 /* 4*/ MatMultAdd_MPISBAIJ,
1339        MatMultTranspose_MPISBAIJ,
1340        MatMultTransposeAdd_MPISBAIJ,
1341        0,
1342        0,
1343        0,
1344 /*10*/ 0,
1345        0,
1346        0,
1347        MatRelax_MPISBAIJ,
1348        MatTranspose_MPISBAIJ,
1349 /*15*/ MatGetInfo_MPISBAIJ,
1350        MatEqual_MPISBAIJ,
1351        MatGetDiagonal_MPISBAIJ,
1352        MatDiagonalScale_MPISBAIJ,
1353        MatNorm_MPISBAIJ,
1354 /*20*/ MatAssemblyBegin_MPISBAIJ,
1355        MatAssemblyEnd_MPISBAIJ,
1356        0,
1357        MatSetOption_MPISBAIJ,
1358        MatZeroEntries_MPISBAIJ,
1359 /*25*/ 0,
1360        0,
1361        0,
1362        0,
1363        0,
1364 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1365        0,
1366        0,
1367        0,
1368        0,
1369 /*35*/ MatDuplicate_MPISBAIJ,
1370        0,
1371        0,
1372        0,
1373        0,
1374 /*40*/ 0,
1375        MatGetSubMatrices_MPISBAIJ,
1376        MatIncreaseOverlap_MPISBAIJ,
1377        MatGetValues_MPISBAIJ,
1378        0,
1379 /*45*/ MatPrintHelp_MPISBAIJ,
1380        MatScale_MPISBAIJ,
1381        0,
1382        0,
1383        0,
1384 /*50*/ 0,
1385        0,
1386        0,
1387        0,
1388        0,
1389 /*55*/ 0,
1390        0,
1391        MatSetUnfactored_MPISBAIJ,
1392        0,
1393        MatSetValuesBlocked_MPISBAIJ,
1394 /*60*/ 0,
1395        0,
1396        0,
1397        MatGetPetscMaps_Petsc,
1398        0,
1399 /*65*/ 0,
1400        0,
1401        0,
1402        0,
1403        0,
1404 /*70*/ MatGetRowMax_MPISBAIJ,
1405        0,
1406        0,
1407        0,
1408        0,
1409 /*75*/ 0,
1410        0,
1411        0,
1412        0,
1413        0,
1414 /*80*/ 0,
1415        0,
1416        0,
1417        0,
1418        MatLoad_MPISBAIJ,
1419 /*85*/ 0,
1420        0,
1421        0,
1422        0,
1423        0,
1424 /*90*/ 0,
1425        0,
1426        0,
1427        0,
1428        0,
1429 /*95*/ 0,
1430        0,
1431        0,
1432        0};
1433 
1434 
1435 EXTERN_C_BEGIN
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1438 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1439 {
1440   PetscFunctionBegin;
1441   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1442   *iscopy = PETSC_FALSE;
1443   PetscFunctionReturn(0);
1444 }
1445 EXTERN_C_END
1446 
1447 EXTERN_C_BEGIN
1448 #undef __FUNCT__
1449 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1450 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1451 {
1452   Mat_MPISBAIJ   *b;
1453   PetscErrorCode ierr;
1454   PetscInt       i,mbs,Mbs;
1455 
1456   PetscFunctionBegin;
1457   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1458 
1459   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1460   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1461   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1462   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1463   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1464   if (d_nnz) {
1465     for (i=0; i<B->m/bs; i++) {
1466       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1467     }
1468   }
1469   if (o_nnz) {
1470     for (i=0; i<B->m/bs; i++) {
1471       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1472     }
1473   }
1474   B->preallocated = PETSC_TRUE;
1475   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1476   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1477   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1478   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1479 
1480   b   = (Mat_MPISBAIJ*)B->data;
1481   mbs = B->m/bs;
1482   Mbs = B->M/bs;
1483   if (mbs*bs != B->m) {
1484     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1485   }
1486 
1487   B->bs  = bs;
1488   b->bs2 = bs*bs;
1489   b->mbs = mbs;
1490   b->nbs = mbs;
1491   b->Mbs = Mbs;
1492   b->Nbs = Mbs;
1493 
1494   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1495   b->rowners[0]    = 0;
1496   for (i=2; i<=b->size; i++) {
1497     b->rowners[i] += b->rowners[i-1];
1498   }
1499   b->rstart    = b->rowners[b->rank];
1500   b->rend      = b->rowners[b->rank+1];
1501   b->cstart    = b->rstart;
1502   b->cend      = b->rend;
1503   for (i=0; i<=b->size; i++) {
1504     b->rowners_bs[i] = b->rowners[i]*bs;
1505   }
1506   b->rstart_bs = b-> rstart*bs;
1507   b->rend_bs   = b->rend*bs;
1508 
1509   b->cstart_bs = b->cstart*bs;
1510   b->cend_bs   = b->cend*bs;
1511 
1512   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1513   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1514   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1515   PetscLogObjectParent(B,b->A);
1516 
1517   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1518   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1519   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1520   PetscLogObjectParent(B,b->B);
1521 
1522   /* build cache for off array entries formed */
1523   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1524 
1525   PetscFunctionReturn(0);
1526 }
1527 EXTERN_C_END
1528 
1529 /*MC
1530    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1531    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1532 
1533    Options Database Keys:
1534 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1535 
1536   Level: beginner
1537 
1538 .seealso: MatCreateMPISBAIJ
1539 M*/
1540 
1541 EXTERN_C_BEGIN
1542 #undef __FUNCT__
1543 #define __FUNCT__ "MatCreate_MPISBAIJ"
1544 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1545 {
1546   Mat_MPISBAIJ   *b;
1547   PetscErrorCode ierr;
1548   PetscTruth     flg;
1549 
1550   PetscFunctionBegin;
1551 
1552   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1553   B->data = (void*)b;
1554   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1555 
1556   B->ops->destroy    = MatDestroy_MPISBAIJ;
1557   B->ops->view       = MatView_MPISBAIJ;
1558   B->mapping    = 0;
1559   B->factor     = 0;
1560   B->assembled  = PETSC_FALSE;
1561 
1562   B->insertmode = NOT_SET_VALUES;
1563   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1564   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1565 
1566   /* build local table of row and column ownerships */
1567   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1568   b->cowners    = b->rowners + b->size + 2;
1569   b->rowners_bs = b->cowners + b->size + 2;
1570   PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));
1571 
1572   /* build cache for off array entries formed */
1573   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1574   b->donotstash  = PETSC_FALSE;
1575   b->colmap      = PETSC_NULL;
1576   b->garray      = PETSC_NULL;
1577   b->roworiented = PETSC_TRUE;
1578 
1579 #if defined(PETSC_USE_MAT_SINGLE)
1580   /* stuff for MatSetValues_XXX in single precision */
1581   b->setvalueslen     = 0;
1582   b->setvaluescopy    = PETSC_NULL;
1583 #endif
1584 
1585   /* stuff used in block assembly */
1586   b->barray       = 0;
1587 
1588   /* stuff used for matrix vector multiply */
1589   b->lvec         = 0;
1590   b->Mvctx        = 0;
1591   b->slvec0       = 0;
1592   b->slvec0b      = 0;
1593   b->slvec1       = 0;
1594   b->slvec1a      = 0;
1595   b->slvec1b      = 0;
1596   b->sMvctx       = 0;
1597 
1598   /* stuff for MatGetRow() */
1599   b->rowindices   = 0;
1600   b->rowvalues    = 0;
1601   b->getrowactive = PETSC_FALSE;
1602 
1603   /* hash table stuff */
1604   b->ht           = 0;
1605   b->hd           = 0;
1606   b->ht_size      = 0;
1607   b->ht_flag      = PETSC_FALSE;
1608   b->ht_fact      = 0;
1609   b->ht_total_ct  = 0;
1610   b->ht_insert_ct = 0;
1611 
1612   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1613   if (flg) {
1614     PetscReal fact = 1.39;
1615     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1616     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1617     if (fact <= 1.0) fact = 1.39;
1618     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1619     PetscLogInfo(0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact);
1620   }
1621   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1622                                      "MatStoreValues_MPISBAIJ",
1623                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1624   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1625                                      "MatRetrieveValues_MPISBAIJ",
1626                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1627   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1628                                      "MatGetDiagonalBlock_MPISBAIJ",
1629                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1630   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1631                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1632                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1633   B->symmetric                  = PETSC_TRUE;
1634   B->structurally_symmetric     = PETSC_TRUE;
1635   B->symmetric_set              = PETSC_TRUE;
1636   B->structurally_symmetric_set = PETSC_TRUE;
1637   PetscFunctionReturn(0);
1638 }
1639 EXTERN_C_END
1640 
1641 /*MC
1642    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1643 
1644    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1645    and MATMPISBAIJ otherwise.
1646 
1647    Options Database Keys:
1648 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1649 
1650   Level: beginner
1651 
1652 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1653 M*/
1654 
1655 EXTERN_C_BEGIN
1656 #undef __FUNCT__
1657 #define __FUNCT__ "MatCreate_SBAIJ"
1658 PetscErrorCode MatCreate_SBAIJ(Mat A)
1659 {
1660   PetscErrorCode ierr;
1661   PetscMPIInt    size;
1662 
1663   PetscFunctionBegin;
1664   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1665   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1666   if (size == 1) {
1667     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1668   } else {
1669     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1670   }
1671   PetscFunctionReturn(0);
1672 }
1673 EXTERN_C_END
1674 
1675 #undef __FUNCT__
1676 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1677 /*@C
1678    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1679    the user should preallocate the matrix storage by setting the parameters
1680    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1681    performance can be increased by more than a factor of 50.
1682 
1683    Collective on Mat
1684 
1685    Input Parameters:
1686 +  A - the matrix
1687 .  bs   - size of blockk
1688 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1689            submatrix  (same for all local rows)
1690 .  d_nnz - array containing the number of block nonzeros in the various block rows
1691            in the upper triangular and diagonal part of the in diagonal portion of the local
1692            (possibly different for each block row) or PETSC_NULL.  You must leave room
1693            for the diagonal entry even if it is zero.
1694 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1695            submatrix (same for all local rows).
1696 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1697            off-diagonal portion of the local submatrix (possibly different for
1698            each block row) or PETSC_NULL.
1699 
1700 
1701    Options Database Keys:
1702 .   -mat_no_unroll - uses code that does not unroll the loops in the
1703                      block calculations (much slower)
1704 .   -mat_block_size - size of the blocks to use
1705 
1706    Notes:
1707 
1708    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1709    than it must be used on all processors that share the object for that argument.
1710 
1711    If the *_nnz parameter is given then the *_nz parameter is ignored
1712 
1713    Storage Information:
1714    For a square global matrix we define each processor's diagonal portion
1715    to be its local rows and the corresponding columns (a square submatrix);
1716    each processor's off-diagonal portion encompasses the remainder of the
1717    local matrix (a rectangular submatrix).
1718 
1719    The user can specify preallocated storage for the diagonal part of
1720    the local submatrix with either d_nz or d_nnz (not both).  Set
1721    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1722    memory allocation.  Likewise, specify preallocated storage for the
1723    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1724 
1725    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1726    the figure below we depict these three local rows and all columns (0-11).
1727 
1728 .vb
1729            0 1 2 3 4 5 6 7 8 9 10 11
1730           -------------------
1731    row 3  |  o o o d d d o o o o o o
1732    row 4  |  o o o d d d o o o o o o
1733    row 5  |  o o o d d d o o o o o o
1734           -------------------
1735 .ve
1736 
1737    Thus, any entries in the d locations are stored in the d (diagonal)
1738    submatrix, and any entries in the o locations are stored in the
1739    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1740    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1741 
1742    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1743    plus the diagonal part of the d matrix,
1744    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1745    In general, for PDE problems in which most nonzeros are near the diagonal,
1746    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1747    or you will get TERRIBLE performance; see the users' manual chapter on
1748    matrices.
1749 
1750    Level: intermediate
1751 
1752 .keywords: matrix, block, aij, compressed row, sparse, parallel
1753 
1754 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1755 @*/
1756 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1757 {
1758   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1759 
1760   PetscFunctionBegin;
1761   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1762   if (f) {
1763     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1764   }
1765   PetscFunctionReturn(0);
1766 }
1767 
1768 #undef __FUNCT__
1769 #define __FUNCT__ "MatCreateMPISBAIJ"
1770 /*@C
1771    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1772    (block compressed row).  For good matrix assembly performance
1773    the user should preallocate the matrix storage by setting the parameters
1774    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1775    performance can be increased by more than a factor of 50.
1776 
1777    Collective on MPI_Comm
1778 
1779    Input Parameters:
1780 +  comm - MPI communicator
1781 .  bs   - size of blockk
1782 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1783            This value should be the same as the local size used in creating the
1784            y vector for the matrix-vector product y = Ax.
1785 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1786            This value should be the same as the local size used in creating the
1787            x vector for the matrix-vector product y = Ax.
1788 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1789 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1790 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1791            submatrix  (same for all local rows)
1792 .  d_nnz - array containing the number of block nonzeros in the various block rows
1793            in the upper triangular portion of the in diagonal portion of the local
1794            (possibly different for each block block row) or PETSC_NULL.
1795            You must leave room for the diagonal entry even if it is zero.
1796 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1797            submatrix (same for all local rows).
1798 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1799            off-diagonal portion of the local submatrix (possibly different for
1800            each block row) or PETSC_NULL.
1801 
1802    Output Parameter:
1803 .  A - the matrix
1804 
1805    Options Database Keys:
1806 .   -mat_no_unroll - uses code that does not unroll the loops in the
1807                      block calculations (much slower)
1808 .   -mat_block_size - size of the blocks to use
1809 .   -mat_mpi - use the parallel matrix data structures even on one processor
1810                (defaults to using SeqBAIJ format on one processor)
1811 
1812    Notes:
1813    The user MUST specify either the local or global matrix dimensions
1814    (possibly both).
1815 
1816    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1817    than it must be used on all processors that share the object for that argument.
1818 
1819    If the *_nnz parameter is given then the *_nz parameter is ignored
1820 
1821    Storage Information:
1822    For a square global matrix we define each processor's diagonal portion
1823    to be its local rows and the corresponding columns (a square submatrix);
1824    each processor's off-diagonal portion encompasses the remainder of the
1825    local matrix (a rectangular submatrix).
1826 
1827    The user can specify preallocated storage for the diagonal part of
1828    the local submatrix with either d_nz or d_nnz (not both).  Set
1829    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1830    memory allocation.  Likewise, specify preallocated storage for the
1831    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1832 
1833    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1834    the figure below we depict these three local rows and all columns (0-11).
1835 
1836 .vb
1837            0 1 2 3 4 5 6 7 8 9 10 11
1838           -------------------
1839    row 3  |  o o o d d d o o o o o o
1840    row 4  |  o o o d d d o o o o o o
1841    row 5  |  o o o d d d o o o o o o
1842           -------------------
1843 .ve
1844 
1845    Thus, any entries in the d locations are stored in the d (diagonal)
1846    submatrix, and any entries in the o locations are stored in the
1847    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1848    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1849 
1850    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1851    plus the diagonal part of the d matrix,
1852    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1853    In general, for PDE problems in which most nonzeros are near the diagonal,
1854    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1855    or you will get TERRIBLE performance; see the users' manual chapter on
1856    matrices.
1857 
1858    Level: intermediate
1859 
1860 .keywords: matrix, block, aij, compressed row, sparse, parallel
1861 
1862 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1863 @*/
1864 
1865 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1866 {
1867   PetscErrorCode ierr;
1868   PetscMPIInt    size;
1869 
1870   PetscFunctionBegin;
1871   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1872   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1873   if (size > 1) {
1874     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1875     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1876   } else {
1877     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1878     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1879   }
1880   PetscFunctionReturn(0);
1881 }
1882 
1883 
1884 #undef __FUNCT__
1885 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1886 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1887 {
1888   Mat            mat;
1889   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1890   PetscErrorCode ierr;
1891   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1892   PetscScalar    *array;
1893 
1894   PetscFunctionBegin;
1895   *newmat       = 0;
1896   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1897   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1898   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1899 
1900   mat->factor       = matin->factor;
1901   mat->preallocated = PETSC_TRUE;
1902   mat->assembled    = PETSC_TRUE;
1903   mat->insertmode   = NOT_SET_VALUES;
1904 
1905   a = (Mat_MPISBAIJ*)mat->data;
1906   mat->bs  = matin->bs;
1907   a->bs2   = oldmat->bs2;
1908   a->mbs   = oldmat->mbs;
1909   a->nbs   = oldmat->nbs;
1910   a->Mbs   = oldmat->Mbs;
1911   a->Nbs   = oldmat->Nbs;
1912 
1913   a->rstart       = oldmat->rstart;
1914   a->rend         = oldmat->rend;
1915   a->cstart       = oldmat->cstart;
1916   a->cend         = oldmat->cend;
1917   a->size         = oldmat->size;
1918   a->rank         = oldmat->rank;
1919   a->donotstash   = oldmat->donotstash;
1920   a->roworiented  = oldmat->roworiented;
1921   a->rowindices   = 0;
1922   a->rowvalues    = 0;
1923   a->getrowactive = PETSC_FALSE;
1924   a->barray       = 0;
1925   a->rstart_bs    = oldmat->rstart_bs;
1926   a->rend_bs      = oldmat->rend_bs;
1927   a->cstart_bs    = oldmat->cstart_bs;
1928   a->cend_bs      = oldmat->cend_bs;
1929 
1930   /* hash table stuff */
1931   a->ht           = 0;
1932   a->hd           = 0;
1933   a->ht_size      = 0;
1934   a->ht_flag      = oldmat->ht_flag;
1935   a->ht_fact      = oldmat->ht_fact;
1936   a->ht_total_ct  = 0;
1937   a->ht_insert_ct = 0;
1938 
1939   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1940   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1941   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1942   if (oldmat->colmap) {
1943 #if defined (PETSC_USE_CTABLE)
1944     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1945 #else
1946     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1947     PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));
1948     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1949 #endif
1950   } else a->colmap = 0;
1951 
1952   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1953     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1954     PetscLogObjectMemory(mat,len*sizeof(PetscInt));
1955     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1956   } else a->garray = 0;
1957 
1958   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1959   PetscLogObjectParent(mat,a->lvec);
1960   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1961   PetscLogObjectParent(mat,a->Mvctx);
1962 
1963   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1964   PetscLogObjectParent(mat,a->slvec0);
1965   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1966   PetscLogObjectParent(mat,a->slvec1);
1967 
1968   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1969   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1970   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1971   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1972   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1973   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1974   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1975   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1976   PetscLogObjectParent(mat,a->slvec0);
1977   PetscLogObjectParent(mat,a->slvec1);
1978   PetscLogObjectParent(mat,a->slvec0b);
1979   PetscLogObjectParent(mat,a->slvec1a);
1980   PetscLogObjectParent(mat,a->slvec1b);
1981 
1982   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1983   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1984   a->sMvctx = oldmat->sMvctx;
1985   PetscLogObjectParent(mat,a->sMvctx);
1986 
1987   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1988   PetscLogObjectParent(mat,a->A);
1989   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1990   PetscLogObjectParent(mat,a->B);
1991   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1992   *newmat = mat;
1993   PetscFunctionReturn(0);
1994 }
1995 
1996 #include "petscsys.h"
1997 
1998 #undef __FUNCT__
1999 #define __FUNCT__ "MatLoad_MPISBAIJ"
2000 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2001 {
2002   Mat            A;
2003   PetscErrorCode ierr;
2004   PetscInt       i,nz,j,rstart,rend;
2005   PetscScalar    *vals,*buf;
2006   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2007   MPI_Status     status;
2008   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2009   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2010   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2011   PetscInt       bs=1,Mbs,mbs,extra_rows;
2012   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2013   PetscInt       dcount,kmax,k,nzcount,tmp;
2014   int            fd;
2015 
2016   PetscFunctionBegin;
2017   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2018 
2019   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2020   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2021   if (!rank) {
2022     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2023     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2024     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2025     if (header[3] < 0) {
2026       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2027     }
2028   }
2029 
2030   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2031   M = header[1]; N = header[2];
2032 
2033   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2034 
2035   /*
2036      This code adds extra rows to make sure the number of rows is
2037      divisible by the blocksize
2038   */
2039   Mbs        = M/bs;
2040   extra_rows = bs - M + bs*(Mbs);
2041   if (extra_rows == bs) extra_rows = 0;
2042   else                  Mbs++;
2043   if (extra_rows &&!rank) {
2044     PetscLogInfo(0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n");
2045   }
2046 
2047   /* determine ownership of all rows */
2048   mbs        = Mbs/size + ((Mbs % size) > rank);
2049   m          = mbs*bs;
2050   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2051   browners   = rowners + size + 1;
2052   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2053   rowners[0] = 0;
2054   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2055   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2056   rstart = rowners[rank];
2057   rend   = rowners[rank+1];
2058 
2059   /* distribute row lengths to all processors */
2060   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2061   if (!rank) {
2062     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2063     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2064     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2065     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2066     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2067     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2068     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2069   } else {
2070     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2071   }
2072 
2073   if (!rank) {   /* procs[0] */
2074     /* calculate the number of nonzeros on each processor */
2075     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2076     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2077     for (i=0; i<size; i++) {
2078       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2079         procsnz[i] += rowlengths[j];
2080       }
2081     }
2082     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2083 
2084     /* determine max buffer needed and allocate it */
2085     maxnz = 0;
2086     for (i=0; i<size; i++) {
2087       maxnz = PetscMax(maxnz,procsnz[i]);
2088     }
2089     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2090 
2091     /* read in my part of the matrix column indices  */
2092     nz     = procsnz[0];
2093     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2094     mycols = ibuf;
2095     if (size == 1)  nz -= extra_rows;
2096     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2097     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2098 
2099     /* read in every ones (except the last) and ship off */
2100     for (i=1; i<size-1; i++) {
2101       nz   = procsnz[i];
2102       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2103       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2104     }
2105     /* read in the stuff for the last proc */
2106     if (size != 1) {
2107       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2108       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2109       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2110       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2111     }
2112     ierr = PetscFree(cols);CHKERRQ(ierr);
2113   } else {  /* procs[i], i>0 */
2114     /* determine buffer space needed for message */
2115     nz = 0;
2116     for (i=0; i<m; i++) {
2117       nz += locrowlens[i];
2118     }
2119     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2120     mycols = ibuf;
2121     /* receive message of column indices*/
2122     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2123     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2124     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2125   }
2126 
2127   /* loop over local rows, determining number of off diagonal entries */
2128   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2129   odlens   = dlens + (rend-rstart);
2130   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2131   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2132   masked1  = mask    + Mbs;
2133   masked2  = masked1 + Mbs;
2134   rowcount = 0; nzcount = 0;
2135   for (i=0; i<mbs; i++) {
2136     dcount  = 0;
2137     odcount = 0;
2138     for (j=0; j<bs; j++) {
2139       kmax = locrowlens[rowcount];
2140       for (k=0; k<kmax; k++) {
2141         tmp = mycols[nzcount++]/bs; /* block col. index */
2142         if (!mask[tmp]) {
2143           mask[tmp] = 1;
2144           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2145           else masked1[dcount++] = tmp; /* entry in diag portion */
2146         }
2147       }
2148       rowcount++;
2149     }
2150 
2151     dlens[i]  = dcount;  /* d_nzz[i] */
2152     odlens[i] = odcount; /* o_nzz[i] */
2153 
2154     /* zero out the mask elements we set */
2155     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2156     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2157   }
2158 
2159   /* create our matrix */
2160   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2161   ierr = MatSetType(A,type);CHKERRQ(ierr);
2162   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2163   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2164 
2165   if (!rank) {
2166     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2167     /* read in my part of the matrix numerical values  */
2168     nz = procsnz[0];
2169     vals = buf;
2170     mycols = ibuf;
2171     if (size == 1)  nz -= extra_rows;
2172     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2173     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2174 
2175     /* insert into matrix */
2176     jj      = rstart*bs;
2177     for (i=0; i<m; i++) {
2178       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2179       mycols += locrowlens[i];
2180       vals   += locrowlens[i];
2181       jj++;
2182     }
2183 
2184     /* read in other processors (except the last one) and ship out */
2185     for (i=1; i<size-1; i++) {
2186       nz   = procsnz[i];
2187       vals = buf;
2188       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2189       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2190     }
2191     /* the last proc */
2192     if (size != 1){
2193       nz   = procsnz[i] - extra_rows;
2194       vals = buf;
2195       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2196       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2197       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2198     }
2199     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2200 
2201   } else {
2202     /* receive numeric values */
2203     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2204 
2205     /* receive message of values*/
2206     vals   = buf;
2207     mycols = ibuf;
2208     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2209     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2210     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2211 
2212     /* insert into matrix */
2213     jj      = rstart*bs;
2214     for (i=0; i<m; i++) {
2215       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2216       mycols += locrowlens[i];
2217       vals   += locrowlens[i];
2218       jj++;
2219     }
2220   }
2221 
2222   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2223   ierr = PetscFree(buf);CHKERRQ(ierr);
2224   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2225   ierr = PetscFree(rowners);CHKERRQ(ierr);
2226   ierr = PetscFree(dlens);CHKERRQ(ierr);
2227   ierr = PetscFree(mask);CHKERRQ(ierr);
2228   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2229   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2230   *newmat = A;
2231   PetscFunctionReturn(0);
2232 }
2233 
2234 #undef __FUNCT__
2235 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2236 /*XXXXX@
2237    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2238 
2239    Input Parameters:
2240 .  mat  - the matrix
2241 .  fact - factor
2242 
2243    Collective on Mat
2244 
2245    Level: advanced
2246 
2247   Notes:
2248    This can also be set by the command line option: -mat_use_hash_table fact
2249 
2250 .keywords: matrix, hashtable, factor, HT
2251 
2252 .seealso: MatSetOption()
2253 @XXXXX*/
2254 
2255 
2256 #undef __FUNCT__
2257 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2258 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2259 {
2260   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2261   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2262   PetscReal      atmp;
2263   PetscReal      *work,*svalues,*rvalues;
2264   PetscErrorCode ierr;
2265   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2266   PetscMPIInt    rank,size;
2267   PetscInt       *rowners_bs,dest,count,source;
2268   PetscScalar    *va;
2269   MatScalar      *ba;
2270   MPI_Status     stat;
2271 
2272   PetscFunctionBegin;
2273   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2274   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2275 
2276   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2277   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2278 
2279   bs   = A->bs;
2280   mbs  = a->mbs;
2281   Mbs  = a->Mbs;
2282   ba   = b->a;
2283   bi   = b->i;
2284   bj   = b->j;
2285 
2286   /* find ownerships */
2287   rowners_bs = a->rowners_bs;
2288 
2289   /* each proc creates an array to be distributed */
2290   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2291   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2292 
2293   /* row_max for B */
2294   if (rank != size-1){
2295     for (i=0; i<mbs; i++) {
2296       ncols = bi[1] - bi[0]; bi++;
2297       brow  = bs*i;
2298       for (j=0; j<ncols; j++){
2299         bcol = bs*(*bj);
2300         for (kcol=0; kcol<bs; kcol++){
2301           col = bcol + kcol;                 /* local col index */
2302           col += rowners_bs[rank+1];      /* global col index */
2303           for (krow=0; krow<bs; krow++){
2304             atmp = PetscAbsScalar(*ba); ba++;
2305             row = brow + krow;    /* local row index */
2306             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2307             if (work[col] < atmp) work[col] = atmp;
2308           }
2309         }
2310         bj++;
2311       }
2312     }
2313 
2314     /* send values to its owners */
2315     for (dest=rank+1; dest<size; dest++){
2316       svalues = work + rowners_bs[dest];
2317       count   = rowners_bs[dest+1]-rowners_bs[dest];
2318       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2319     }
2320   }
2321 
2322   /* receive values */
2323   if (rank){
2324     rvalues = work;
2325     count   = rowners_bs[rank+1]-rowners_bs[rank];
2326     for (source=0; source<rank; source++){
2327       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2328       /* process values */
2329       for (i=0; i<count; i++){
2330         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2331       }
2332     }
2333   }
2334 
2335   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2336   ierr = PetscFree(work);CHKERRQ(ierr);
2337   PetscFunctionReturn(0);
2338 }
2339 
2340 #undef __FUNCT__
2341 #define __FUNCT__ "MatRelax_MPISBAIJ"
2342 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2343 {
2344   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2345   PetscErrorCode ierr;
2346   PetscInt       mbs=mat->mbs,bs=matin->bs;
2347   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2348   Vec            bb1;
2349 
2350   PetscFunctionBegin;
2351   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2352   if (bs > 1)
2353     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2354 
2355   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2356     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2357       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2358       its--;
2359     }
2360 
2361     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2362     while (its--){
2363 
2364       /* lower triangular part: slvec0b = - B^T*xx */
2365       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2366 
2367       /* copy xx into slvec0a */
2368       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2369       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2370       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2371       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2372 
2373       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2374 
2375       /* copy bb into slvec1a */
2376       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2377       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2378       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2379       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2380 
2381       /* set slvec1b = 0 */
2382       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2383 
2384       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2385       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2386       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2387       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2388 
2389       /* upper triangular part: bb1 = bb1 - B*x */
2390       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2391 
2392       /* local diagonal sweep */
2393       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2394     }
2395     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2396   } else {
2397     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2398   }
2399   PetscFunctionReturn(0);
2400 }
2401 
2402 #undef __FUNCT__
2403 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2404 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2405 {
2406   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2407   PetscErrorCode ierr;
2408   PetscScalar    mone=-1.0;
2409   Vec            lvec1,bb1;
2410 
2411   PetscFunctionBegin;
2412   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2413   if (matin->bs > 1)
2414     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2415 
2416   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2417     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2418       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2419       its--;
2420     }
2421 
2422     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2423     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2424     while (its--){
2425       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2426 
2427       /* lower diagonal part: bb1 = bb - B^T*xx */
2428       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2429       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2430 
2431       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2432       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2433       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2434 
2435       /* upper diagonal part: bb1 = bb1 - B*x */
2436       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2437       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2438 
2439       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2440 
2441       /* diagonal sweep */
2442       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2443     }
2444     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2445     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2446   } else {
2447     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2448   }
2449   PetscFunctionReturn(0);
2450 }
2451 
2452