xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 0b8dc8d2aad2e8b6c929528b6bafed2bccee6aac)
1 
2 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
3 #include "mpisbaij.h"
4 #include "src/mat/impls/sbaij/seq/sbaij.h"
5 
6 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
8 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
9 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
10 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
11 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
13 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
16 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
18 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
19 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
20 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
21 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
22 
23 /*  UGLY, ugly, ugly
24    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
25    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
26    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
27    converts the entries into single precision and then calls ..._MatScalar() to put them
28    into the single precision data structures.
29 */
30 #if defined(PETSC_USE_MAT_SINGLE)
31 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
32 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 #else
37 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
38 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
39 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
40 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
41 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
42 #endif
43 
44 EXTERN_C_BEGIN
45 #undef __FUNCT__
46 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
47 PetscErrorCode MatStoreValues_MPISBAIJ(Mat mat)
48 {
49   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
50   PetscErrorCode ierr;
51 
52   PetscFunctionBegin;
53   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
54   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
55   PetscFunctionReturn(0);
56 }
57 EXTERN_C_END
58 
59 EXTERN_C_BEGIN
60 #undef __FUNCT__
61 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
62 PetscErrorCode MatRetrieveValues_MPISBAIJ(Mat mat)
63 {
64   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
65   PetscErrorCode ierr;
66 
67   PetscFunctionBegin;
68   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
69   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
70   PetscFunctionReturn(0);
71 }
72 EXTERN_C_END
73 
74 
75 #define CHUNKSIZE  10
76 
77 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
78 { \
79  \
80     brow = row/bs;  \
81     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
82     rmax = aimax[brow]; nrow = ailen[brow]; \
83       bcol = col/bs; \
84       ridx = row % bs; cidx = col % bs; \
85       low = 0; high = nrow; \
86       while (high-low > 3) { \
87         t = (low+high)/2; \
88         if (rp[t] > bcol) high = t; \
89         else              low  = t; \
90       } \
91       for (_i=low; _i<high; _i++) { \
92         if (rp[_i] > bcol) break; \
93         if (rp[_i] == bcol) { \
94           bap  = ap +  bs2*_i + bs*cidx + ridx; \
95           if (addv == ADD_VALUES) *bap += value;  \
96           else                    *bap  = value;  \
97           goto a_noinsert; \
98         } \
99       } \
100       if (a->nonew == 1) goto a_noinsert; \
101       else if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
102       if (nrow >= rmax) { \
103         /* there is no extra room in row, therefore enlarge */ \
104         PetscInt       new_nz = ai[a->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
105         MatScalar *new_a; \
106  \
107         if (a->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
108  \
109         /* malloc new storage space */ \
110         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(a->mbs+1)*sizeof(PetscInt); \
111         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
112         new_j = (PetscInt*)(new_a + bs2*new_nz); \
113         new_i = new_j + new_nz; \
114  \
115         /* copy over old data into new slots */ \
116         for (ii=0; ii<brow+1; ii++) {new_i[ii] = ai[ii];} \
117         for (ii=brow+1; ii<a->mbs+1; ii++) {new_i[ii] = ai[ii]+CHUNKSIZE;} \
118         ierr = PetscMemcpy(new_j,aj,(ai[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
119         len = (new_nz - CHUNKSIZE - ai[brow] - nrow); \
120         ierr = PetscMemcpy(new_j+ai[brow]+nrow+CHUNKSIZE,aj+ai[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
121         ierr = PetscMemcpy(new_a,aa,(ai[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
122         ierr = PetscMemzero(new_a+bs2*(ai[brow]+nrow),bs2*CHUNKSIZE*sizeof(PetscScalar));CHKERRQ(ierr); \
123         ierr = PetscMemcpy(new_a+bs2*(ai[brow]+nrow+CHUNKSIZE), \
124                     aa+bs2*(ai[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
125         /* free up old matrix storage */ \
126         ierr = PetscFree(a->a);CHKERRQ(ierr);  \
127         if (!a->singlemalloc) { \
128           ierr = PetscFree(a->i);CHKERRQ(ierr); \
129           ierr = PetscFree(a->j);CHKERRQ(ierr);\
130         } \
131         aa = a->a = new_a; ai = a->i = new_i; aj = a->j = new_j;  \
132         a->singlemalloc = PETSC_TRUE; \
133  \
134         rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
135         rmax = aimax[brow] = aimax[brow] + CHUNKSIZE; \
136         ierr = PetscLogObjectMemory(A,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
137         a->maxnz += bs2*CHUNKSIZE; \
138         a->reallocs++; \
139         a->nz++; \
140       } \
141       N = nrow++ - 1;  \
142       /* shift up all the later entries in this row */ \
143       for (ii=N; ii>=_i; ii--) { \
144         rp[ii+1] = rp[ii]; \
145         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
146       } \
147       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
148       rp[_i]                      = bcol;  \
149       ap[bs2*_i + bs*cidx + ridx] = value;  \
150       a_noinsert:; \
151     ailen[brow] = nrow; \
152 }
153 #ifndef MatSetValues_SeqBAIJ_B_Private
154 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
155 { \
156     brow = row/bs;  \
157     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
158     rmax = bimax[brow]; nrow = bilen[brow]; \
159       bcol = col/bs; \
160       ridx = row % bs; cidx = col % bs; \
161       low = 0; high = nrow; \
162       while (high-low > 3) { \
163         t = (low+high)/2; \
164         if (rp[t] > bcol) high = t; \
165         else              low  = t; \
166       } \
167       for (_i=low; _i<high; _i++) { \
168         if (rp[_i] > bcol) break; \
169         if (rp[_i] == bcol) { \
170           bap  = ap +  bs2*_i + bs*cidx + ridx; \
171           if (addv == ADD_VALUES) *bap += value;  \
172           else                    *bap  = value;  \
173           goto b_noinsert; \
174         } \
175       } \
176       if (b->nonew == 1) goto b_noinsert; \
177       else if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
178       if (nrow >= rmax) { \
179         /* there is no extra room in row, therefore enlarge */ \
180         PetscInt  new_nz = bi[b->mbs] + CHUNKSIZE,len,*new_i,*new_j; \
181         MatScalar *new_a; \
182  \
183         if (b->nonew == -2) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) in the matrix", row, col); \
184  \
185         /* malloc new storage space */ \
186         len   = new_nz*(sizeof(PetscInt)+bs2*sizeof(MatScalar))+(b->mbs+1)*sizeof(PetscInt); \
187         ierr  = PetscMalloc(len,&new_a);CHKERRQ(ierr); \
188         new_j = (PetscInt*)(new_a + bs2*new_nz); \
189         new_i = new_j + new_nz; \
190  \
191         /* copy over old data into new slots */ \
192         for (ii=0; ii<brow+1; ii++) {new_i[ii] = bi[ii];} \
193         for (ii=brow+1; ii<b->mbs+1; ii++) {new_i[ii] = bi[ii]+CHUNKSIZE;} \
194         ierr = PetscMemcpy(new_j,bj,(bi[brow]+nrow)*sizeof(PetscInt));CHKERRQ(ierr); \
195         len  = (new_nz - CHUNKSIZE - bi[brow] - nrow); \
196         ierr = PetscMemcpy(new_j+bi[brow]+nrow+CHUNKSIZE,bj+bi[brow]+nrow,len*sizeof(PetscInt));CHKERRQ(ierr); \
197         ierr = PetscMemcpy(new_a,ba,(bi[brow]+nrow)*bs2*sizeof(MatScalar));CHKERRQ(ierr); \
198         ierr = PetscMemzero(new_a+bs2*(bi[brow]+nrow),bs2*CHUNKSIZE*sizeof(MatScalar));CHKERRQ(ierr); \
199         ierr = PetscMemcpy(new_a+bs2*(bi[brow]+nrow+CHUNKSIZE), \
200                     ba+bs2*(bi[brow]+nrow),bs2*len*sizeof(MatScalar));CHKERRQ(ierr);  \
201         /* free up old matrix storage */ \
202         ierr = PetscFree(b->a);CHKERRQ(ierr);  \
203         if (!b->singlemalloc) { \
204           ierr = PetscFree(b->i);CHKERRQ(ierr); \
205           ierr = PetscFree(b->j);CHKERRQ(ierr); \
206         } \
207         ba = b->a = new_a; bi = b->i = new_i; bj = b->j = new_j;  \
208         b->singlemalloc = PETSC_TRUE; \
209  \
210         rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
211         rmax = bimax[brow] = bimax[brow] + CHUNKSIZE; \
212         ierr = PetscLogObjectMemory(B,CHUNKSIZE*(sizeof(PetscInt) + bs2*sizeof(MatScalar)));CHKERRQ(ierr); \
213         b->maxnz += bs2*CHUNKSIZE; \
214         b->reallocs++; \
215         b->nz++; \
216       } \
217       N = nrow++ - 1;  \
218       /* shift up all the later entries in this row */ \
219       for (ii=N; ii>=_i; ii--) { \
220         rp[ii+1] = rp[ii]; \
221         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
222       } \
223       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
224       rp[_i]                      = bcol;  \
225       ap[bs2*_i + bs*cidx + ridx] = value;  \
226       b_noinsert:; \
227     bilen[brow] = nrow; \
228 }
229 #endif
230 
231 #if defined(PETSC_USE_MAT_SINGLE)
232 #undef __FUNCT__
233 #define __FUNCT__ "MatSetValues_MPISBAIJ"
234 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
235 {
236   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
237   PetscErrorCode ierr;
238   PetscInt       i,N = m*n;
239   MatScalar      *vsingle;
240 
241   PetscFunctionBegin;
242   if (N > b->setvalueslen) {
243     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
244     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
245     b->setvalueslen  = N;
246   }
247   vsingle = b->setvaluescopy;
248 
249   for (i=0; i<N; i++) {
250     vsingle[i] = v[i];
251   }
252   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
253   PetscFunctionReturn(0);
254 }
255 
256 #undef __FUNCT__
257 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
258 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
259 {
260   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
261   PetscErrorCode ierr;
262   PetscInt       i,N = m*n*b->bs2;
263   MatScalar      *vsingle;
264 
265   PetscFunctionBegin;
266   if (N > b->setvalueslen) {
267     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
268     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
269     b->setvalueslen  = N;
270   }
271   vsingle = b->setvaluescopy;
272   for (i=0; i<N; i++) {
273     vsingle[i] = v[i];
274   }
275   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
276   PetscFunctionReturn(0);
277 }
278 #endif
279 
280 /* Only add/insert a(i,j) with i<=j (blocks).
281    Any a(i,j) with i>j input by user is ingored.
282 */
283 #undef __FUNCT__
284 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
285 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
286 {
287   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
288   MatScalar      value;
289   PetscTruth     roworiented = baij->roworiented;
290   PetscErrorCode ierr;
291   PetscInt       i,j,row,col;
292   PetscInt       rstart_orig=baij->rstart_bs;
293   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
294   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
295 
296   /* Some Variables required in the macro */
297   Mat            A = baij->A;
298   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
299   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
300   MatScalar      *aa=a->a;
301 
302   Mat            B = baij->B;
303   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
304   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
305   MatScalar     *ba=b->a;
306 
307   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
308   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
309   MatScalar     *ap,*bap;
310 
311   /* for stash */
312   PetscInt      n_loc, *in_loc=0;
313   MatScalar     *v_loc=0;
314 
315   PetscFunctionBegin;
316 
317   if(!baij->donotstash){
318     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
319     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
320   }
321 
322   for (i=0; i<m; i++) {
323     if (im[i] < 0) continue;
324 #if defined(PETSC_USE_DEBUG)
325     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
326 #endif
327     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
328       row = im[i] - rstart_orig;              /* local row index */
329       for (j=0; j<n; j++) {
330         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
331         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
332           col = in[j] - cstart_orig;          /* local col index */
333           brow = row/bs; bcol = col/bs;
334           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
335           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
336           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
337           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
338         } else if (in[j] < 0) continue;
339 #if defined(PETSC_USE_DEBUG)
340         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
341 #endif
342         else {  /* off-diag entry (B) */
343           if (mat->was_assembled) {
344             if (!baij->colmap) {
345               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
346             }
347 #if defined (PETSC_USE_CTABLE)
348             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
349             col  = col - 1;
350 #else
351             col = baij->colmap[in[j]/bs] - 1;
352 #endif
353             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
354               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
355               col =  in[j];
356               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
357               B = baij->B;
358               b = (Mat_SeqBAIJ*)(B)->data;
359               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
360               ba=b->a;
361             } else col += in[j]%bs;
362           } else col = in[j];
363           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
364           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
365           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
366         }
367       }
368     } else {  /* off processor entry */
369       if (!baij->donotstash) {
370         n_loc = 0;
371         for (j=0; j<n; j++){
372           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
373           in_loc[n_loc] = in[j];
374           if (roworiented) {
375             v_loc[n_loc] = v[i*n+j];
376           } else {
377             v_loc[n_loc] = v[j*m+i];
378           }
379           n_loc++;
380         }
381         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
382       }
383     }
384   }
385 
386   if(!baij->donotstash){
387     ierr = PetscFree(in_loc);CHKERRQ(ierr);
388     ierr = PetscFree(v_loc);CHKERRQ(ierr);
389   }
390   PetscFunctionReturn(0);
391 }
392 
393 #undef __FUNCT__
394 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
395 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
396 {
397   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
398   const MatScalar *value;
399   MatScalar       *barray=baij->barray;
400   PetscTruth      roworiented = baij->roworiented;
401   PetscErrorCode  ierr;
402   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
403   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
404   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
405 
406   PetscFunctionBegin;
407   if(!barray) {
408     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
409     baij->barray = barray;
410   }
411 
412   if (roworiented) {
413     stepval = (n-1)*bs;
414   } else {
415     stepval = (m-1)*bs;
416   }
417   for (i=0; i<m; i++) {
418     if (im[i] < 0) continue;
419 #if defined(PETSC_USE_DEBUG)
420     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
421 #endif
422     if (im[i] >= rstart && im[i] < rend) {
423       row = im[i] - rstart;
424       for (j=0; j<n; j++) {
425         /* If NumCol = 1 then a copy is not required */
426         if ((roworiented) && (n == 1)) {
427           barray = (MatScalar*) v + i*bs2;
428         } else if((!roworiented) && (m == 1)) {
429           barray = (MatScalar*) v + j*bs2;
430         } else { /* Here a copy is required */
431           if (roworiented) {
432             value = v + i*(stepval+bs)*bs + j*bs;
433           } else {
434             value = v + j*(stepval+bs)*bs + i*bs;
435           }
436           for (ii=0; ii<bs; ii++,value+=stepval) {
437             for (jj=0; jj<bs; jj++) {
438               *barray++  = *value++;
439             }
440           }
441           barray -=bs2;
442         }
443 
444         if (in[j] >= cstart && in[j] < cend){
445           col  = in[j] - cstart;
446           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
447         }
448         else if (in[j] < 0) continue;
449 #if defined(PETSC_USE_DEBUG)
450         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
451 #endif
452         else {
453           if (mat->was_assembled) {
454             if (!baij->colmap) {
455               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
456             }
457 
458 #if defined(PETSC_USE_DEBUG)
459 #if defined (PETSC_USE_CTABLE)
460             { PetscInt data;
461               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
462               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
463             }
464 #else
465             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
466 #endif
467 #endif
468 #if defined (PETSC_USE_CTABLE)
469 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
470             col  = (col - 1)/bs;
471 #else
472             col = (baij->colmap[in[j]] - 1)/bs;
473 #endif
474             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
475               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
476               col =  in[j];
477             }
478           }
479           else col = in[j];
480           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
481         }
482       }
483     } else {
484       if (!baij->donotstash) {
485         if (roworiented) {
486           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
487         } else {
488           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
489         }
490       }
491     }
492   }
493   PetscFunctionReturn(0);
494 }
495 
496 #undef __FUNCT__
497 #define __FUNCT__ "MatGetValues_MPISBAIJ"
498 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
499 {
500   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
501   PetscErrorCode ierr;
502   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
503   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
504 
505   PetscFunctionBegin;
506   for (i=0; i<m; i++) {
507     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
508     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
509     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
510       row = idxm[i] - bsrstart;
511       for (j=0; j<n; j++) {
512         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
513         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
514         if (idxn[j] >= bscstart && idxn[j] < bscend){
515           col = idxn[j] - bscstart;
516           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
517         } else {
518           if (!baij->colmap) {
519             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
520           }
521 #if defined (PETSC_USE_CTABLE)
522           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
523           data --;
524 #else
525           data = baij->colmap[idxn[j]/bs]-1;
526 #endif
527           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
528           else {
529             col  = data + idxn[j]%bs;
530             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
531           }
532         }
533       }
534     } else {
535       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
536     }
537   }
538  PetscFunctionReturn(0);
539 }
540 
541 #undef __FUNCT__
542 #define __FUNCT__ "MatNorm_MPISBAIJ"
543 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
544 {
545   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
546   PetscErrorCode ierr;
547   PetscReal      sum[2],*lnorm2;
548 
549   PetscFunctionBegin;
550   if (baij->size == 1) {
551     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
552   } else {
553     if (type == NORM_FROBENIUS) {
554       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
555       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
556       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
557       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
558       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
559       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
560       *norm = sqrt(sum[0] + 2*sum[1]);
561       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
562     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
563       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
564       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
565       PetscReal    *rsum,*rsum2,vabs;
566       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
567       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
568       MatScalar    *v;
569 
570       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
571       rsum2 = rsum + mat->N;
572       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
573       /* Amat */
574       v = amat->a; jj = amat->j;
575       for (brow=0; brow<mbs; brow++) {
576         grow = bs*(rstart + brow);
577         nz = amat->i[brow+1] - amat->i[brow];
578         for (bcol=0; bcol<nz; bcol++){
579           gcol = bs*(rstart + *jj); jj++;
580           for (col=0; col<bs; col++){
581             for (row=0; row<bs; row++){
582               vabs = PetscAbsScalar(*v); v++;
583               rsum[gcol+col] += vabs;
584               /* non-diagonal block */
585               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
586             }
587           }
588         }
589       }
590       /* Bmat */
591       v = bmat->a; jj = bmat->j;
592       for (brow=0; brow<mbs; brow++) {
593         grow = bs*(rstart + brow);
594         nz = bmat->i[brow+1] - bmat->i[brow];
595         for (bcol=0; bcol<nz; bcol++){
596           gcol = bs*garray[*jj]; jj++;
597           for (col=0; col<bs; col++){
598             for (row=0; row<bs; row++){
599               vabs = PetscAbsScalar(*v); v++;
600               rsum[gcol+col] += vabs;
601               rsum[grow+row] += vabs;
602             }
603           }
604         }
605       }
606       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
607       *norm = 0.0;
608       for (col=0; col<mat->N; col++) {
609         if (rsum2[col] > *norm) *norm = rsum2[col];
610       }
611       ierr = PetscFree(rsum);CHKERRQ(ierr);
612     } else {
613       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
614     }
615   }
616   PetscFunctionReturn(0);
617 }
618 
619 #undef __FUNCT__
620 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
621 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
622 {
623   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
624   PetscErrorCode ierr;
625   PetscInt       nstash,reallocs;
626   InsertMode     addv;
627 
628   PetscFunctionBegin;
629   if (baij->donotstash) {
630     PetscFunctionReturn(0);
631   }
632 
633   /* make sure all processors are either in INSERTMODE or ADDMODE */
634   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
635   if (addv == (ADD_VALUES|INSERT_VALUES)) {
636     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
637   }
638   mat->insertmode = addv; /* in case this processor had no cache */
639 
640   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
641   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
642   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
643   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
644   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
645   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
646   PetscFunctionReturn(0);
647 }
648 
649 #undef __FUNCT__
650 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
651 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
652 {
653   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
654   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
655   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
656   PetscErrorCode ierr;
657   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
658   PetscInt       *row,*col,other_disassembled;
659   PetscMPIInt    n;
660   PetscTruth     r1,r2,r3;
661   MatScalar      *val;
662   InsertMode     addv = mat->insertmode;
663 
664   PetscFunctionBegin;
665 
666   if (!baij->donotstash) {
667     while (1) {
668       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
669       if (!flg) break;
670 
671       for (i=0; i<n;) {
672         /* Now identify the consecutive vals belonging to the same row */
673         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
674         if (j < n) ncols = j-i;
675         else       ncols = n-i;
676         /* Now assemble all these values with a single function call */
677         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
678         i = j;
679       }
680     }
681     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
682     /* Now process the block-stash. Since the values are stashed column-oriented,
683        set the roworiented flag to column oriented, and after MatSetValues()
684        restore the original flags */
685     r1 = baij->roworiented;
686     r2 = a->roworiented;
687     r3 = b->roworiented;
688     baij->roworiented = PETSC_FALSE;
689     a->roworiented    = PETSC_FALSE;
690     b->roworiented    = PETSC_FALSE;
691     while (1) {
692       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
693       if (!flg) break;
694 
695       for (i=0; i<n;) {
696         /* Now identify the consecutive vals belonging to the same row */
697         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
698         if (j < n) ncols = j-i;
699         else       ncols = n-i;
700         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
701         i = j;
702       }
703     }
704     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
705     baij->roworiented = r1;
706     a->roworiented    = r2;
707     b->roworiented    = r3;
708   }
709 
710   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
711   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
712 
713   /* determine if any processor has disassembled, if so we must
714      also disassemble ourselfs, in order that we may reassemble. */
715   /*
716      if nonzero structure of submatrix B cannot change then we know that
717      no processor disassembled thus we can skip this stuff
718   */
719   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
720     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
721     if (mat->was_assembled && !other_disassembled) {
722       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
723     }
724   }
725 
726   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
727     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
728   }
729   b->compressedrow.use = PETSC_TRUE;
730   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
731   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
732 
733   if (baij->rowvalues) {
734     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
735     baij->rowvalues = 0;
736   }
737 
738   PetscFunctionReturn(0);
739 }
740 
741 #undef __FUNCT__
742 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
743 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
744 {
745   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
746   PetscErrorCode    ierr;
747   PetscInt          bs = mat->bs;
748   PetscMPIInt       size = baij->size,rank = baij->rank;
749   PetscTruth        iascii,isdraw;
750   PetscViewer       sviewer;
751   PetscViewerFormat format;
752 
753   PetscFunctionBegin;
754   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
755   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
756   if (iascii) {
757     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
758     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
759       MatInfo info;
760       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
761       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
762       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
763               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
764               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
765       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
766       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
767       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
768       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
769       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
770       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
771       PetscFunctionReturn(0);
772     } else if (format == PETSC_VIEWER_ASCII_INFO) {
773       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
774       PetscFunctionReturn(0);
775     }
776   }
777 
778   if (isdraw) {
779     PetscDraw       draw;
780     PetscTruth isnull;
781     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
782     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
783   }
784 
785   if (size == 1) {
786     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
787     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
788   } else {
789     /* assemble the entire matrix onto first processor. */
790     Mat         A;
791     Mat_SeqSBAIJ *Aloc;
792     Mat_SeqBAIJ *Bloc;
793     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
794     MatScalar   *a;
795 
796     /* Should this be the same type as mat? */
797     if (!rank) {
798       ierr = MatCreate(mat->comm,M,N,M,N,&A);CHKERRQ(ierr);
799     } else {
800       ierr = MatCreate(mat->comm,0,0,M,N,&A);CHKERRQ(ierr);
801     }
802     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
803     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
804     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
805 
806     /* copy over the A part */
807     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
808     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
809     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
810 
811     for (i=0; i<mbs; i++) {
812       rvals[0] = bs*(baij->rstart + i);
813       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
814       for (j=ai[i]; j<ai[i+1]; j++) {
815         col = (baij->cstart+aj[j])*bs;
816         for (k=0; k<bs; k++) {
817           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
818           col++; a += bs;
819         }
820       }
821     }
822     /* copy over the B part */
823     Bloc = (Mat_SeqBAIJ*)baij->B->data;
824     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
825     for (i=0; i<mbs; i++) {
826       rvals[0] = bs*(baij->rstart + i);
827       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
828       for (j=ai[i]; j<ai[i+1]; j++) {
829         col = baij->garray[aj[j]]*bs;
830         for (k=0; k<bs; k++) {
831           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
832           col++; a += bs;
833         }
834       }
835     }
836     ierr = PetscFree(rvals);CHKERRQ(ierr);
837     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
838     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
839     /*
840        Everyone has to call to draw the matrix since the graphics waits are
841        synchronized across all processors that share the PetscDraw object
842     */
843     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
844     if (!rank) {
845       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
846       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
847     }
848     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
849     ierr = MatDestroy(A);CHKERRQ(ierr);
850   }
851   PetscFunctionReturn(0);
852 }
853 
854 #undef __FUNCT__
855 #define __FUNCT__ "MatView_MPISBAIJ"
856 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
857 {
858   PetscErrorCode ierr;
859   PetscTruth     iascii,isdraw,issocket,isbinary;
860 
861   PetscFunctionBegin;
862   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
863   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
864   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
865   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
866   if (iascii || isdraw || issocket || isbinary) {
867     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
868   } else {
869     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
870   }
871   PetscFunctionReturn(0);
872 }
873 
874 #undef __FUNCT__
875 #define __FUNCT__ "MatDestroy_MPISBAIJ"
876 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
877 {
878   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
879   PetscErrorCode ierr;
880 
881   PetscFunctionBegin;
882 #if defined(PETSC_USE_LOG)
883   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
884 #endif
885   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
886   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
887   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
888   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
889   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
890 #if defined (PETSC_USE_CTABLE)
891   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
892 #else
893   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
894 #endif
895   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
896   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
897   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
898   if (baij->slvec0) {
899     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
900     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
901   }
902   if (baij->slvec1) {
903     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
904     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
905     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
906   }
907   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
908   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
909   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
910   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
911 #if defined(PETSC_USE_MAT_SINGLE)
912   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
913 #endif
914   ierr = PetscFree(baij);CHKERRQ(ierr);
915 
916   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
917   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
918   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
919   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
920   PetscFunctionReturn(0);
921 }
922 
923 #undef __FUNCT__
924 #define __FUNCT__ "MatMult_MPISBAIJ"
925 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
926 {
927   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
928   PetscErrorCode ierr;
929   PetscInt       nt,mbs=a->mbs,bs=A->bs;
930   PetscScalar    *x,*from,zero=0.0;
931 
932   PetscFunctionBegin;
933   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
934   if (nt != A->n) {
935     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
936   }
937   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
938   if (nt != A->m) {
939     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
940   }
941 
942   /* diagonal part */
943   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
944   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
945 
946   /* subdiagonal part */
947   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
948 
949   /* copy x into the vec slvec0 */
950   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
951   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
952   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
953   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
954 
955   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
956   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
957   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
958 
959   /* supperdiagonal part */
960   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
961 
962   PetscFunctionReturn(0);
963 }
964 
965 #undef __FUNCT__
966 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
967 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
968 {
969   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
970   PetscErrorCode ierr;
971   PetscInt       nt;
972 
973   PetscFunctionBegin;
974   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
975   if (nt != A->n) {
976     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
977   }
978   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
979   if (nt != A->m) {
980     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
981   }
982 
983   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
984   /* do diagonal part */
985   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
986   /* do supperdiagonal part */
987   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
988   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
989   /* do subdiagonal part */
990   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
991   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
992   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
993 
994   PetscFunctionReturn(0);
995 }
996 
997 #undef __FUNCT__
998 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
999 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1000 {
1001   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1002   PetscErrorCode ierr;
1003   PetscInt       mbs=a->mbs,bs=A->bs;
1004   PetscScalar    *x,*from,zero=0.0;
1005 
1006   PetscFunctionBegin;
1007   /*
1008   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
1009   PetscSynchronizedFlush(A->comm);
1010   */
1011   /* diagonal part */
1012   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
1013   ierr = VecSet(&zero,a->slvec1b);CHKERRQ(ierr);
1014 
1015   /* subdiagonal part */
1016   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
1017 
1018   /* copy x into the vec slvec0 */
1019   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
1020   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
1021   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
1022   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
1023 
1024   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1025   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
1026   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
1027 
1028   /* supperdiagonal part */
1029   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
1030 
1031   PetscFunctionReturn(0);
1032 }
1033 
1034 #undef __FUNCT__
1035 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
1036 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
1037 {
1038   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1039   PetscErrorCode ierr;
1040 
1041   PetscFunctionBegin;
1042   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1043   /* do diagonal part */
1044   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
1045   /* do supperdiagonal part */
1046   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
1047   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
1048 
1049   /* do subdiagonal part */
1050   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
1051   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1052   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
1053 
1054   PetscFunctionReturn(0);
1055 }
1056 
1057 #undef __FUNCT__
1058 #define __FUNCT__ "MatMultTranspose_MPISBAIJ"
1059 PetscErrorCode MatMultTranspose_MPISBAIJ(Mat A,Vec xx,Vec yy)
1060 {
1061   PetscErrorCode ierr;
1062 
1063   PetscFunctionBegin;
1064   ierr = MatMult(A,xx,yy);CHKERRQ(ierr);
1065   PetscFunctionReturn(0);
1066 }
1067 
1068 #undef __FUNCT__
1069 #define __FUNCT__ "MatMultTransposeAdd_MPISBAIJ"
1070 PetscErrorCode MatMultTransposeAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
1071 {
1072   PetscErrorCode ierr;
1073 
1074   PetscFunctionBegin;
1075   ierr = MatMultAdd(A,xx,yy,zz);CHKERRQ(ierr);
1076   PetscFunctionReturn(0);
1077 }
1078 
1079 /*
1080   This only works correctly for square matrices where the subblock A->A is the
1081    diagonal block
1082 */
1083 #undef __FUNCT__
1084 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1085 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1086 {
1087   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1088   PetscErrorCode ierr;
1089 
1090   PetscFunctionBegin;
1091   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1092   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1093   PetscFunctionReturn(0);
1094 }
1095 
1096 #undef __FUNCT__
1097 #define __FUNCT__ "MatScale_MPISBAIJ"
1098 PetscErrorCode MatScale_MPISBAIJ(const PetscScalar *aa,Mat A)
1099 {
1100   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1101   PetscErrorCode ierr;
1102 
1103   PetscFunctionBegin;
1104   ierr = MatScale(aa,a->A);CHKERRQ(ierr);
1105   ierr = MatScale(aa,a->B);CHKERRQ(ierr);
1106   PetscFunctionReturn(0);
1107 }
1108 
1109 #undef __FUNCT__
1110 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1111 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1112 {
1113   PetscFunctionBegin;
1114   SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1115   PetscFunctionReturn(0);
1116 }
1117 
1118 #undef __FUNCT__
1119 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1120 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1121 {
1122   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1123 
1124   PetscFunctionBegin;
1125   if (!baij->getrowactive) {
1126     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1127   }
1128   baij->getrowactive = PETSC_FALSE;
1129   PetscFunctionReturn(0);
1130 }
1131 
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1134 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1135 {
1136   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1137   PetscErrorCode ierr;
1138 
1139   PetscFunctionBegin;
1140   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1141   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1147 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1148 {
1149   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1150   Mat            A = a->A,B = a->B;
1151   PetscErrorCode ierr;
1152   PetscReal      isend[5],irecv[5];
1153 
1154   PetscFunctionBegin;
1155   info->block_size     = (PetscReal)matin->bs;
1156   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1157   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1158   isend[3] = info->memory;  isend[4] = info->mallocs;
1159   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1160   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1161   isend[3] += info->memory;  isend[4] += info->mallocs;
1162   if (flag == MAT_LOCAL) {
1163     info->nz_used      = isend[0];
1164     info->nz_allocated = isend[1];
1165     info->nz_unneeded  = isend[2];
1166     info->memory       = isend[3];
1167     info->mallocs      = isend[4];
1168   } else if (flag == MAT_GLOBAL_MAX) {
1169     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1170     info->nz_used      = irecv[0];
1171     info->nz_allocated = irecv[1];
1172     info->nz_unneeded  = irecv[2];
1173     info->memory       = irecv[3];
1174     info->mallocs      = irecv[4];
1175   } else if (flag == MAT_GLOBAL_SUM) {
1176     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1177     info->nz_used      = irecv[0];
1178     info->nz_allocated = irecv[1];
1179     info->nz_unneeded  = irecv[2];
1180     info->memory       = irecv[3];
1181     info->mallocs      = irecv[4];
1182   } else {
1183     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1184   }
1185   info->rows_global       = (PetscReal)A->M;
1186   info->columns_global    = (PetscReal)A->N;
1187   info->rows_local        = (PetscReal)A->m;
1188   info->columns_local     = (PetscReal)A->N;
1189   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1190   info->fill_ratio_needed = 0;
1191   info->factor_mallocs    = 0;
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1197 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1198 {
1199   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1200   PetscErrorCode ierr;
1201 
1202   PetscFunctionBegin;
1203   switch (op) {
1204   case MAT_NO_NEW_NONZERO_LOCATIONS:
1205   case MAT_YES_NEW_NONZERO_LOCATIONS:
1206   case MAT_COLUMNS_UNSORTED:
1207   case MAT_COLUMNS_SORTED:
1208   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1209   case MAT_KEEP_ZEROED_ROWS:
1210   case MAT_NEW_NONZERO_LOCATION_ERR:
1211     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1212     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1213     break;
1214   case MAT_ROW_ORIENTED:
1215     a->roworiented = PETSC_TRUE;
1216     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1217     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1218     break;
1219   case MAT_ROWS_SORTED:
1220   case MAT_ROWS_UNSORTED:
1221   case MAT_YES_NEW_DIAGONALS:
1222     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1223     break;
1224   case MAT_COLUMN_ORIENTED:
1225     a->roworiented = PETSC_FALSE;
1226     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1227     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1228     break;
1229   case MAT_IGNORE_OFF_PROC_ENTRIES:
1230     a->donotstash = PETSC_TRUE;
1231     break;
1232   case MAT_NO_NEW_DIAGONALS:
1233     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1234   case MAT_USE_HASH_TABLE:
1235     a->ht_flag = PETSC_TRUE;
1236     break;
1237   case MAT_NOT_SYMMETRIC:
1238   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1239   case MAT_HERMITIAN:
1240     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1241   case MAT_SYMMETRIC:
1242   case MAT_STRUCTURALLY_SYMMETRIC:
1243   case MAT_NOT_HERMITIAN:
1244   case MAT_SYMMETRY_ETERNAL:
1245   case MAT_NOT_SYMMETRY_ETERNAL:
1246     break;
1247   default:
1248     SETERRQ(PETSC_ERR_SUP,"unknown option");
1249   }
1250   PetscFunctionReturn(0);
1251 }
1252 
1253 #undef __FUNCT__
1254 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1255 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1256 {
1257   PetscErrorCode ierr;
1258   PetscFunctionBegin;
1259   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1260   PetscFunctionReturn(0);
1261 }
1262 
1263 #undef __FUNCT__
1264 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1265 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1266 {
1267   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1268   Mat            a=baij->A, b=baij->B;
1269   PetscErrorCode ierr;
1270   PetscInt       nv,m,n;
1271   PetscTruth     flg;
1272 
1273   PetscFunctionBegin;
1274   if (ll != rr){
1275     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1276     if (!flg)
1277       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1278   }
1279   if (!ll) PetscFunctionReturn(0);
1280 
1281   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1282   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1283 
1284   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1285   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1286 
1287   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1288 
1289   /* left diagonalscale the off-diagonal part */
1290   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1291 
1292   /* scale the diagonal part */
1293   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1294 
1295   /* right diagonalscale the off-diagonal part */
1296   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1297   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1303 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1304 {
1305   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1306   MPI_Comm          comm = A->comm;
1307   static PetscTruth called = PETSC_FALSE;
1308   PetscErrorCode    ierr;
1309 
1310   PetscFunctionBegin;
1311   if (!a->rank) {
1312     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1313   }
1314   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1315   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1316   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1317   PetscFunctionReturn(0);
1318 }
1319 
1320 #undef __FUNCT__
1321 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1322 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1323 {
1324   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1325   PetscErrorCode ierr;
1326 
1327   PetscFunctionBegin;
1328   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1329   PetscFunctionReturn(0);
1330 }
1331 
1332 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1333 
1334 #undef __FUNCT__
1335 #define __FUNCT__ "MatEqual_MPISBAIJ"
1336 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1337 {
1338   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1339   Mat            a,b,c,d;
1340   PetscTruth     flg;
1341   PetscErrorCode ierr;
1342 
1343   PetscFunctionBegin;
1344   a = matA->A; b = matA->B;
1345   c = matB->A; d = matB->B;
1346 
1347   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1348   if (flg) {
1349     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1350   }
1351   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1352   PetscFunctionReturn(0);
1353 }
1354 
1355 #undef __FUNCT__
1356 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1357 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1358 {
1359   PetscErrorCode ierr;
1360 
1361   PetscFunctionBegin;
1362   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1368 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1369 {
1370   PetscErrorCode ierr;
1371   PetscInt       i;
1372   PetscTruth     flg;
1373 
1374   PetscFunctionBegin;
1375   for (i=0; i<n; i++) {
1376     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1377     if (!flg) {
1378       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1379     }
1380   }
1381   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1382   PetscFunctionReturn(0);
1383 }
1384 
1385 
1386 /* -------------------------------------------------------------------*/
1387 static struct _MatOps MatOps_Values = {
1388        MatSetValues_MPISBAIJ,
1389        MatGetRow_MPISBAIJ,
1390        MatRestoreRow_MPISBAIJ,
1391        MatMult_MPISBAIJ,
1392 /* 4*/ MatMultAdd_MPISBAIJ,
1393        MatMultTranspose_MPISBAIJ,
1394        MatMultTransposeAdd_MPISBAIJ,
1395        0,
1396        0,
1397        0,
1398 /*10*/ 0,
1399        0,
1400        0,
1401        MatRelax_MPISBAIJ,
1402        MatTranspose_MPISBAIJ,
1403 /*15*/ MatGetInfo_MPISBAIJ,
1404        MatEqual_MPISBAIJ,
1405        MatGetDiagonal_MPISBAIJ,
1406        MatDiagonalScale_MPISBAIJ,
1407        MatNorm_MPISBAIJ,
1408 /*20*/ MatAssemblyBegin_MPISBAIJ,
1409        MatAssemblyEnd_MPISBAIJ,
1410        0,
1411        MatSetOption_MPISBAIJ,
1412        MatZeroEntries_MPISBAIJ,
1413 /*25*/ 0,
1414        0,
1415        0,
1416        0,
1417        0,
1418 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1419        0,
1420        0,
1421        0,
1422        0,
1423 /*35*/ MatDuplicate_MPISBAIJ,
1424        0,
1425        0,
1426        0,
1427        0,
1428 /*40*/ 0,
1429        MatGetSubMatrices_MPISBAIJ,
1430        MatIncreaseOverlap_MPISBAIJ,
1431        MatGetValues_MPISBAIJ,
1432        0,
1433 /*45*/ MatPrintHelp_MPISBAIJ,
1434        MatScale_MPISBAIJ,
1435        0,
1436        0,
1437        0,
1438 /*50*/ 0,
1439        0,
1440        0,
1441        0,
1442        0,
1443 /*55*/ 0,
1444        0,
1445        MatSetUnfactored_MPISBAIJ,
1446        0,
1447        MatSetValuesBlocked_MPISBAIJ,
1448 /*60*/ 0,
1449        0,
1450        0,
1451        MatGetPetscMaps_Petsc,
1452        0,
1453 /*65*/ 0,
1454        0,
1455        0,
1456        0,
1457        0,
1458 /*70*/ MatGetRowMax_MPISBAIJ,
1459        0,
1460        0,
1461        0,
1462        0,
1463 /*75*/ 0,
1464        0,
1465        0,
1466        0,
1467        0,
1468 /*80*/ 0,
1469        0,
1470        0,
1471        0,
1472        MatLoad_MPISBAIJ,
1473 /*85*/ 0,
1474        0,
1475        0,
1476        0,
1477        0,
1478 /*90*/ 0,
1479        0,
1480        0,
1481        0,
1482        0,
1483 /*95*/ 0,
1484        0,
1485        0,
1486        0};
1487 
1488 
1489 EXTERN_C_BEGIN
1490 #undef __FUNCT__
1491 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1492 PetscErrorCode MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1493 {
1494   PetscFunctionBegin;
1495   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1496   *iscopy = PETSC_FALSE;
1497   PetscFunctionReturn(0);
1498 }
1499 EXTERN_C_END
1500 
1501 EXTERN_C_BEGIN
1502 #undef __FUNCT__
1503 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1504 PetscErrorCode MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1505 {
1506   Mat_MPISBAIJ   *b;
1507   PetscErrorCode ierr;
1508   PetscInt       i,mbs,Mbs;
1509 
1510   PetscFunctionBegin;
1511   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1512 
1513   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1514   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1515   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1516   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1517   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1518   if (d_nnz) {
1519     for (i=0; i<B->m/bs; i++) {
1520       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1521     }
1522   }
1523   if (o_nnz) {
1524     for (i=0; i<B->m/bs; i++) {
1525       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1526     }
1527   }
1528   B->preallocated = PETSC_TRUE;
1529   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1530   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1531   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1532   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1533 
1534   b   = (Mat_MPISBAIJ*)B->data;
1535   mbs = B->m/bs;
1536   Mbs = B->M/bs;
1537   if (mbs*bs != B->m) {
1538     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1539   }
1540 
1541   B->bs  = bs;
1542   b->bs2 = bs*bs;
1543   b->mbs = mbs;
1544   b->nbs = mbs;
1545   b->Mbs = Mbs;
1546   b->Nbs = Mbs;
1547 
1548   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1549   b->rowners[0]    = 0;
1550   for (i=2; i<=b->size; i++) {
1551     b->rowners[i] += b->rowners[i-1];
1552   }
1553   b->rstart    = b->rowners[b->rank];
1554   b->rend      = b->rowners[b->rank+1];
1555   b->cstart    = b->rstart;
1556   b->cend      = b->rend;
1557   for (i=0; i<=b->size; i++) {
1558     b->rowners_bs[i] = b->rowners[i]*bs;
1559   }
1560   b->rstart_bs = b-> rstart*bs;
1561   b->rend_bs   = b->rend*bs;
1562 
1563   b->cstart_bs = b->cstart*bs;
1564   b->cend_bs   = b->cend*bs;
1565 
1566   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->m,B->m,B->m,&b->A);CHKERRQ(ierr);
1567   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1568   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1569   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1570 
1571   ierr = MatCreate(PETSC_COMM_SELF,B->m,B->M,B->m,B->M,&b->B);CHKERRQ(ierr);
1572   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1573   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1574   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1575 
1576   /* build cache for off array entries formed */
1577   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1578 
1579   PetscFunctionReturn(0);
1580 }
1581 EXTERN_C_END
1582 
1583 /*MC
1584    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1585    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1586 
1587    Options Database Keys:
1588 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1589 
1590   Level: beginner
1591 
1592 .seealso: MatCreateMPISBAIJ
1593 M*/
1594 
1595 EXTERN_C_BEGIN
1596 #undef __FUNCT__
1597 #define __FUNCT__ "MatCreate_MPISBAIJ"
1598 PetscErrorCode MatCreate_MPISBAIJ(Mat B)
1599 {
1600   Mat_MPISBAIJ   *b;
1601   PetscErrorCode ierr;
1602   PetscTruth     flg;
1603 
1604   PetscFunctionBegin;
1605 
1606   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1607   B->data = (void*)b;
1608   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1609 
1610   B->ops->destroy    = MatDestroy_MPISBAIJ;
1611   B->ops->view       = MatView_MPISBAIJ;
1612   B->mapping    = 0;
1613   B->factor     = 0;
1614   B->assembled  = PETSC_FALSE;
1615 
1616   B->insertmode = NOT_SET_VALUES;
1617   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1618   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1619 
1620   /* build local table of row and column ownerships */
1621   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1622   b->cowners    = b->rowners + b->size + 2;
1623   b->rowners_bs = b->cowners + b->size + 2;
1624   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1625 
1626   /* build cache for off array entries formed */
1627   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1628   b->donotstash  = PETSC_FALSE;
1629   b->colmap      = PETSC_NULL;
1630   b->garray      = PETSC_NULL;
1631   b->roworiented = PETSC_TRUE;
1632 
1633 #if defined(PETSC_USE_MAT_SINGLE)
1634   /* stuff for MatSetValues_XXX in single precision */
1635   b->setvalueslen     = 0;
1636   b->setvaluescopy    = PETSC_NULL;
1637 #endif
1638 
1639   /* stuff used in block assembly */
1640   b->barray       = 0;
1641 
1642   /* stuff used for matrix vector multiply */
1643   b->lvec         = 0;
1644   b->Mvctx        = 0;
1645   b->slvec0       = 0;
1646   b->slvec0b      = 0;
1647   b->slvec1       = 0;
1648   b->slvec1a      = 0;
1649   b->slvec1b      = 0;
1650   b->sMvctx       = 0;
1651 
1652   /* stuff for MatGetRow() */
1653   b->rowindices   = 0;
1654   b->rowvalues    = 0;
1655   b->getrowactive = PETSC_FALSE;
1656 
1657   /* hash table stuff */
1658   b->ht           = 0;
1659   b->hd           = 0;
1660   b->ht_size      = 0;
1661   b->ht_flag      = PETSC_FALSE;
1662   b->ht_fact      = 0;
1663   b->ht_total_ct  = 0;
1664   b->ht_insert_ct = 0;
1665 
1666   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1667   if (flg) {
1668     PetscReal fact = 1.39;
1669     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1670     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1671     if (fact <= 1.0) fact = 1.39;
1672     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1673     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1674   }
1675   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1676                                      "MatStoreValues_MPISBAIJ",
1677                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1678   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1679                                      "MatRetrieveValues_MPISBAIJ",
1680                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1681   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1682                                      "MatGetDiagonalBlock_MPISBAIJ",
1683                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1684   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1685                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1686                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1687   B->symmetric                  = PETSC_TRUE;
1688   B->structurally_symmetric     = PETSC_TRUE;
1689   B->symmetric_set              = PETSC_TRUE;
1690   B->structurally_symmetric_set = PETSC_TRUE;
1691   PetscFunctionReturn(0);
1692 }
1693 EXTERN_C_END
1694 
1695 /*MC
1696    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1697 
1698    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1699    and MATMPISBAIJ otherwise.
1700 
1701    Options Database Keys:
1702 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1703 
1704   Level: beginner
1705 
1706 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1707 M*/
1708 
1709 EXTERN_C_BEGIN
1710 #undef __FUNCT__
1711 #define __FUNCT__ "MatCreate_SBAIJ"
1712 PetscErrorCode MatCreate_SBAIJ(Mat A)
1713 {
1714   PetscErrorCode ierr;
1715   PetscMPIInt    size;
1716 
1717   PetscFunctionBegin;
1718   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1719   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1720   if (size == 1) {
1721     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1722   } else {
1723     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1724   }
1725   PetscFunctionReturn(0);
1726 }
1727 EXTERN_C_END
1728 
1729 #undef __FUNCT__
1730 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1731 /*@C
1732    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1733    the user should preallocate the matrix storage by setting the parameters
1734    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1735    performance can be increased by more than a factor of 50.
1736 
1737    Collective on Mat
1738 
1739    Input Parameters:
1740 +  A - the matrix
1741 .  bs   - size of blockk
1742 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1743            submatrix  (same for all local rows)
1744 .  d_nnz - array containing the number of block nonzeros in the various block rows
1745            in the upper triangular and diagonal part of the in diagonal portion of the local
1746            (possibly different for each block row) or PETSC_NULL.  You must leave room
1747            for the diagonal entry even if it is zero.
1748 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1749            submatrix (same for all local rows).
1750 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1751            off-diagonal portion of the local submatrix (possibly different for
1752            each block row) or PETSC_NULL.
1753 
1754 
1755    Options Database Keys:
1756 .   -mat_no_unroll - uses code that does not unroll the loops in the
1757                      block calculations (much slower)
1758 .   -mat_block_size - size of the blocks to use
1759 
1760    Notes:
1761 
1762    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1763    than it must be used on all processors that share the object for that argument.
1764 
1765    If the *_nnz parameter is given then the *_nz parameter is ignored
1766 
1767    Storage Information:
1768    For a square global matrix we define each processor's diagonal portion
1769    to be its local rows and the corresponding columns (a square submatrix);
1770    each processor's off-diagonal portion encompasses the remainder of the
1771    local matrix (a rectangular submatrix).
1772 
1773    The user can specify preallocated storage for the diagonal part of
1774    the local submatrix with either d_nz or d_nnz (not both).  Set
1775    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1776    memory allocation.  Likewise, specify preallocated storage for the
1777    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1778 
1779    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1780    the figure below we depict these three local rows and all columns (0-11).
1781 
1782 .vb
1783            0 1 2 3 4 5 6 7 8 9 10 11
1784           -------------------
1785    row 3  |  o o o d d d o o o o o o
1786    row 4  |  o o o d d d o o o o o o
1787    row 5  |  o o o d d d o o o o o o
1788           -------------------
1789 .ve
1790 
1791    Thus, any entries in the d locations are stored in the d (diagonal)
1792    submatrix, and any entries in the o locations are stored in the
1793    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1794    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1795 
1796    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1797    plus the diagonal part of the d matrix,
1798    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1799    In general, for PDE problems in which most nonzeros are near the diagonal,
1800    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1801    or you will get TERRIBLE performance; see the users' manual chapter on
1802    matrices.
1803 
1804    Level: intermediate
1805 
1806 .keywords: matrix, block, aij, compressed row, sparse, parallel
1807 
1808 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1809 @*/
1810 PetscErrorCode MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1811 {
1812   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1813 
1814   PetscFunctionBegin;
1815   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1816   if (f) {
1817     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1818   }
1819   PetscFunctionReturn(0);
1820 }
1821 
1822 #undef __FUNCT__
1823 #define __FUNCT__ "MatCreateMPISBAIJ"
1824 /*@C
1825    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1826    (block compressed row).  For good matrix assembly performance
1827    the user should preallocate the matrix storage by setting the parameters
1828    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1829    performance can be increased by more than a factor of 50.
1830 
1831    Collective on MPI_Comm
1832 
1833    Input Parameters:
1834 +  comm - MPI communicator
1835 .  bs   - size of blockk
1836 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1837            This value should be the same as the local size used in creating the
1838            y vector for the matrix-vector product y = Ax.
1839 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1840            This value should be the same as the local size used in creating the
1841            x vector for the matrix-vector product y = Ax.
1842 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1843 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1844 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1845            submatrix  (same for all local rows)
1846 .  d_nnz - array containing the number of block nonzeros in the various block rows
1847            in the upper triangular portion of the in diagonal portion of the local
1848            (possibly different for each block block row) or PETSC_NULL.
1849            You must leave room for the diagonal entry even if it is zero.
1850 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1851            submatrix (same for all local rows).
1852 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1853            off-diagonal portion of the local submatrix (possibly different for
1854            each block row) or PETSC_NULL.
1855 
1856    Output Parameter:
1857 .  A - the matrix
1858 
1859    Options Database Keys:
1860 .   -mat_no_unroll - uses code that does not unroll the loops in the
1861                      block calculations (much slower)
1862 .   -mat_block_size - size of the blocks to use
1863 .   -mat_mpi - use the parallel matrix data structures even on one processor
1864                (defaults to using SeqBAIJ format on one processor)
1865 
1866    Notes:
1867    The number of rows and columns must be divisible by blocksize.
1868 
1869    The user MUST specify either the local or global matrix dimensions
1870    (possibly both).
1871 
1872    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1873    than it must be used on all processors that share the object for that argument.
1874 
1875    If the *_nnz parameter is given then the *_nz parameter is ignored
1876 
1877    Storage Information:
1878    For a square global matrix we define each processor's diagonal portion
1879    to be its local rows and the corresponding columns (a square submatrix);
1880    each processor's off-diagonal portion encompasses the remainder of the
1881    local matrix (a rectangular submatrix).
1882 
1883    The user can specify preallocated storage for the diagonal part of
1884    the local submatrix with either d_nz or d_nnz (not both).  Set
1885    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1886    memory allocation.  Likewise, specify preallocated storage for the
1887    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1888 
1889    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1890    the figure below we depict these three local rows and all columns (0-11).
1891 
1892 .vb
1893            0 1 2 3 4 5 6 7 8 9 10 11
1894           -------------------
1895    row 3  |  o o o d d d o o o o o o
1896    row 4  |  o o o d d d o o o o o o
1897    row 5  |  o o o d d d o o o o o o
1898           -------------------
1899 .ve
1900 
1901    Thus, any entries in the d locations are stored in the d (diagonal)
1902    submatrix, and any entries in the o locations are stored in the
1903    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1904    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1905 
1906    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1907    plus the diagonal part of the d matrix,
1908    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1909    In general, for PDE problems in which most nonzeros are near the diagonal,
1910    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1911    or you will get TERRIBLE performance; see the users' manual chapter on
1912    matrices.
1913 
1914    Level: intermediate
1915 
1916 .keywords: matrix, block, aij, compressed row, sparse, parallel
1917 
1918 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1919 @*/
1920 
1921 PetscErrorCode MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1922 {
1923   PetscErrorCode ierr;
1924   PetscMPIInt    size;
1925 
1926   PetscFunctionBegin;
1927   ierr = MatCreate(comm,m,n,M,N,A);CHKERRQ(ierr);
1928   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1929   if (size > 1) {
1930     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1931     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1932   } else {
1933     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1934     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1935   }
1936   PetscFunctionReturn(0);
1937 }
1938 
1939 
1940 #undef __FUNCT__
1941 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1942 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1943 {
1944   Mat            mat;
1945   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1946   PetscErrorCode ierr;
1947   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1948   PetscScalar    *array;
1949 
1950   PetscFunctionBegin;
1951   *newmat       = 0;
1952   ierr = MatCreate(matin->comm,matin->m,matin->n,matin->M,matin->N,&mat);CHKERRQ(ierr);
1953   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1954   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1955 
1956   mat->factor       = matin->factor;
1957   mat->preallocated = PETSC_TRUE;
1958   mat->assembled    = PETSC_TRUE;
1959   mat->insertmode   = NOT_SET_VALUES;
1960 
1961   a = (Mat_MPISBAIJ*)mat->data;
1962   mat->bs  = matin->bs;
1963   a->bs2   = oldmat->bs2;
1964   a->mbs   = oldmat->mbs;
1965   a->nbs   = oldmat->nbs;
1966   a->Mbs   = oldmat->Mbs;
1967   a->Nbs   = oldmat->Nbs;
1968 
1969   a->rstart       = oldmat->rstart;
1970   a->rend         = oldmat->rend;
1971   a->cstart       = oldmat->cstart;
1972   a->cend         = oldmat->cend;
1973   a->size         = oldmat->size;
1974   a->rank         = oldmat->rank;
1975   a->donotstash   = oldmat->donotstash;
1976   a->roworiented  = oldmat->roworiented;
1977   a->rowindices   = 0;
1978   a->rowvalues    = 0;
1979   a->getrowactive = PETSC_FALSE;
1980   a->barray       = 0;
1981   a->rstart_bs    = oldmat->rstart_bs;
1982   a->rend_bs      = oldmat->rend_bs;
1983   a->cstart_bs    = oldmat->cstart_bs;
1984   a->cend_bs      = oldmat->cend_bs;
1985 
1986   /* hash table stuff */
1987   a->ht           = 0;
1988   a->hd           = 0;
1989   a->ht_size      = 0;
1990   a->ht_flag      = oldmat->ht_flag;
1991   a->ht_fact      = oldmat->ht_fact;
1992   a->ht_total_ct  = 0;
1993   a->ht_insert_ct = 0;
1994 
1995   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1996   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1997   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1998   if (oldmat->colmap) {
1999 #if defined (PETSC_USE_CTABLE)
2000     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2001 #else
2002     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2003     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2004     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2005 #endif
2006   } else a->colmap = 0;
2007 
2008   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2009     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2010     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2011     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2012   } else a->garray = 0;
2013 
2014   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2015   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2016   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2017   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2018 
2019   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2020   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2021   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2022   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2023 
2024   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2025   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2026   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2027   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2028   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2029   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2030   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2031   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2032   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2033   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2034   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2035   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2036   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2037 
2038   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2039   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2040   a->sMvctx = oldmat->sMvctx;
2041   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2042 
2043   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2044   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2045   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2046   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2047   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2048   *newmat = mat;
2049   PetscFunctionReturn(0);
2050 }
2051 
2052 #include "petscsys.h"
2053 
2054 #undef __FUNCT__
2055 #define __FUNCT__ "MatLoad_MPISBAIJ"
2056 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer,const MatType type,Mat *newmat)
2057 {
2058   Mat            A;
2059   PetscErrorCode ierr;
2060   PetscInt       i,nz,j,rstart,rend;
2061   PetscScalar    *vals,*buf;
2062   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2063   MPI_Status     status;
2064   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners;
2065   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2066   PetscInt       *locrowlens,*procsnz = 0,jj,*mycols,*ibuf;
2067   PetscInt       bs=1,Mbs,mbs,extra_rows;
2068   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2069   PetscInt       dcount,kmax,k,nzcount,tmp;
2070   int            fd;
2071 
2072   PetscFunctionBegin;
2073   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2074 
2075   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2076   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2077   if (!rank) {
2078     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2079     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2080     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2081     if (header[3] < 0) {
2082       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2083     }
2084   }
2085 
2086   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2087   M = header[1]; N = header[2];
2088 
2089   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2090 
2091   /*
2092      This code adds extra rows to make sure the number of rows is
2093      divisible by the blocksize
2094   */
2095   Mbs        = M/bs;
2096   extra_rows = bs - M + bs*(Mbs);
2097   if (extra_rows == bs) extra_rows = 0;
2098   else                  Mbs++;
2099   if (extra_rows &&!rank) {
2100     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2101   }
2102 
2103   /* determine ownership of all rows */
2104   mbs        = Mbs/size + ((Mbs % size) > rank);
2105   m          = mbs*bs;
2106   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2107   browners   = rowners + size + 1;
2108   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2109   rowners[0] = 0;
2110   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2111   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2112   rstart = rowners[rank];
2113   rend   = rowners[rank+1];
2114 
2115   /* distribute row lengths to all processors */
2116   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscInt),&locrowlens);CHKERRQ(ierr);
2117   if (!rank) {
2118     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2119     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2120     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2121     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2122     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2123     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2124     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2125   } else {
2126     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2127   }
2128 
2129   if (!rank) {   /* procs[0] */
2130     /* calculate the number of nonzeros on each processor */
2131     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2132     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2133     for (i=0; i<size; i++) {
2134       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2135         procsnz[i] += rowlengths[j];
2136       }
2137     }
2138     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2139 
2140     /* determine max buffer needed and allocate it */
2141     maxnz = 0;
2142     for (i=0; i<size; i++) {
2143       maxnz = PetscMax(maxnz,procsnz[i]);
2144     }
2145     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2146 
2147     /* read in my part of the matrix column indices  */
2148     nz     = procsnz[0];
2149     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2150     mycols = ibuf;
2151     if (size == 1)  nz -= extra_rows;
2152     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2153     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2154 
2155     /* read in every ones (except the last) and ship off */
2156     for (i=1; i<size-1; i++) {
2157       nz   = procsnz[i];
2158       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2159       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2160     }
2161     /* read in the stuff for the last proc */
2162     if (size != 1) {
2163       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2164       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2165       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2166       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2167     }
2168     ierr = PetscFree(cols);CHKERRQ(ierr);
2169   } else {  /* procs[i], i>0 */
2170     /* determine buffer space needed for message */
2171     nz = 0;
2172     for (i=0; i<m; i++) {
2173       nz += locrowlens[i];
2174     }
2175     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2176     mycols = ibuf;
2177     /* receive message of column indices*/
2178     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2179     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2180     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2181   }
2182 
2183   /* loop over local rows, determining number of off diagonal entries */
2184   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2185   odlens   = dlens + (rend-rstart);
2186   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2187   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2188   masked1  = mask    + Mbs;
2189   masked2  = masked1 + Mbs;
2190   rowcount = 0; nzcount = 0;
2191   for (i=0; i<mbs; i++) {
2192     dcount  = 0;
2193     odcount = 0;
2194     for (j=0; j<bs; j++) {
2195       kmax = locrowlens[rowcount];
2196       for (k=0; k<kmax; k++) {
2197         tmp = mycols[nzcount++]/bs; /* block col. index */
2198         if (!mask[tmp]) {
2199           mask[tmp] = 1;
2200           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2201           else masked1[dcount++] = tmp; /* entry in diag portion */
2202         }
2203       }
2204       rowcount++;
2205     }
2206 
2207     dlens[i]  = dcount;  /* d_nzz[i] */
2208     odlens[i] = odcount; /* o_nzz[i] */
2209 
2210     /* zero out the mask elements we set */
2211     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2212     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2213   }
2214 
2215   /* create our matrix */
2216   ierr = MatCreate(comm,m,m,PETSC_DETERMINE,PETSC_DETERMINE,&A);CHKERRQ(ierr);
2217   ierr = MatSetType(A,type);CHKERRQ(ierr);
2218   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2219   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2220 
2221   if (!rank) {
2222     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2223     /* read in my part of the matrix numerical values  */
2224     nz = procsnz[0];
2225     vals = buf;
2226     mycols = ibuf;
2227     if (size == 1)  nz -= extra_rows;
2228     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2229     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2230 
2231     /* insert into matrix */
2232     jj      = rstart*bs;
2233     for (i=0; i<m; i++) {
2234       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2235       mycols += locrowlens[i];
2236       vals   += locrowlens[i];
2237       jj++;
2238     }
2239 
2240     /* read in other processors (except the last one) and ship out */
2241     for (i=1; i<size-1; i++) {
2242       nz   = procsnz[i];
2243       vals = buf;
2244       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2245       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2246     }
2247     /* the last proc */
2248     if (size != 1){
2249       nz   = procsnz[i] - extra_rows;
2250       vals = buf;
2251       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2252       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2253       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2254     }
2255     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2256 
2257   } else {
2258     /* receive numeric values */
2259     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2260 
2261     /* receive message of values*/
2262     vals   = buf;
2263     mycols = ibuf;
2264     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2265     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2266     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2267 
2268     /* insert into matrix */
2269     jj      = rstart*bs;
2270     for (i=0; i<m; i++) {
2271       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2272       mycols += locrowlens[i];
2273       vals   += locrowlens[i];
2274       jj++;
2275     }
2276   }
2277 
2278   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2279   ierr = PetscFree(buf);CHKERRQ(ierr);
2280   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2281   ierr = PetscFree(rowners);CHKERRQ(ierr);
2282   ierr = PetscFree(dlens);CHKERRQ(ierr);
2283   ierr = PetscFree(mask);CHKERRQ(ierr);
2284   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2285   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2286   *newmat = A;
2287   PetscFunctionReturn(0);
2288 }
2289 
2290 #undef __FUNCT__
2291 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2292 /*XXXXX@
2293    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2294 
2295    Input Parameters:
2296 .  mat  - the matrix
2297 .  fact - factor
2298 
2299    Collective on Mat
2300 
2301    Level: advanced
2302 
2303   Notes:
2304    This can also be set by the command line option: -mat_use_hash_table fact
2305 
2306 .keywords: matrix, hashtable, factor, HT
2307 
2308 .seealso: MatSetOption()
2309 @XXXXX*/
2310 
2311 
2312 #undef __FUNCT__
2313 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2314 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2315 {
2316   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2317   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2318   PetscReal      atmp;
2319   PetscReal      *work,*svalues,*rvalues;
2320   PetscErrorCode ierr;
2321   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2322   PetscMPIInt    rank,size;
2323   PetscInt       *rowners_bs,dest,count,source;
2324   PetscScalar    *va;
2325   MatScalar      *ba;
2326   MPI_Status     stat;
2327 
2328   PetscFunctionBegin;
2329   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2330   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2331 
2332   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2333   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2334 
2335   bs   = A->bs;
2336   mbs  = a->mbs;
2337   Mbs  = a->Mbs;
2338   ba   = b->a;
2339   bi   = b->i;
2340   bj   = b->j;
2341 
2342   /* find ownerships */
2343   rowners_bs = a->rowners_bs;
2344 
2345   /* each proc creates an array to be distributed */
2346   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2347   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2348 
2349   /* row_max for B */
2350   if (rank != size-1){
2351     for (i=0; i<mbs; i++) {
2352       ncols = bi[1] - bi[0]; bi++;
2353       brow  = bs*i;
2354       for (j=0; j<ncols; j++){
2355         bcol = bs*(*bj);
2356         for (kcol=0; kcol<bs; kcol++){
2357           col = bcol + kcol;                 /* local col index */
2358           col += rowners_bs[rank+1];      /* global col index */
2359           for (krow=0; krow<bs; krow++){
2360             atmp = PetscAbsScalar(*ba); ba++;
2361             row = brow + krow;    /* local row index */
2362             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2363             if (work[col] < atmp) work[col] = atmp;
2364           }
2365         }
2366         bj++;
2367       }
2368     }
2369 
2370     /* send values to its owners */
2371     for (dest=rank+1; dest<size; dest++){
2372       svalues = work + rowners_bs[dest];
2373       count   = rowners_bs[dest+1]-rowners_bs[dest];
2374       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2375     }
2376   }
2377 
2378   /* receive values */
2379   if (rank){
2380     rvalues = work;
2381     count   = rowners_bs[rank+1]-rowners_bs[rank];
2382     for (source=0; source<rank; source++){
2383       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2384       /* process values */
2385       for (i=0; i<count; i++){
2386         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2387       }
2388     }
2389   }
2390 
2391   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2392   ierr = PetscFree(work);CHKERRQ(ierr);
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatRelax_MPISBAIJ"
2398 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2399 {
2400   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2401   PetscErrorCode ierr;
2402   PetscInt       mbs=mat->mbs,bs=matin->bs;
2403   PetscScalar    mone=-1.0,*x,*b,*ptr,zero=0.0;
2404   Vec            bb1;
2405 
2406   PetscFunctionBegin;
2407   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2408   if (bs > 1)
2409     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2410 
2411   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2412     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2413       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2414       its--;
2415     }
2416 
2417     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2418     while (its--){
2419 
2420       /* lower triangular part: slvec0b = - B^T*xx */
2421       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2422 
2423       /* copy xx into slvec0a */
2424       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2425       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2426       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2427       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2428 
2429       ierr = VecScale(&mone,mat->slvec0);CHKERRQ(ierr);
2430 
2431       /* copy bb into slvec1a */
2432       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2433       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2434       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2435       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2436 
2437       /* set slvec1b = 0 */
2438       ierr = VecSet(&zero,mat->slvec1b);CHKERRQ(ierr);
2439 
2440       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2441       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2442       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2443       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2444 
2445       /* upper triangular part: bb1 = bb1 - B*x */
2446       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2447 
2448       /* local diagonal sweep */
2449       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2450     }
2451     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2452   } else {
2453     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2454   }
2455   PetscFunctionReturn(0);
2456 }
2457 
2458 #undef __FUNCT__
2459 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2460 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2461 {
2462   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2463   PetscErrorCode ierr;
2464   PetscScalar    mone=-1.0;
2465   Vec            lvec1,bb1;
2466 
2467   PetscFunctionBegin;
2468   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2469   if (matin->bs > 1)
2470     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2471 
2472   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2473     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2474       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2475       its--;
2476     }
2477 
2478     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2479     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2480     while (its--){
2481       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2482 
2483       /* lower diagonal part: bb1 = bb - B^T*xx */
2484       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2485       ierr = VecScale(&mone,lvec1);CHKERRQ(ierr);
2486 
2487       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2488       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2489       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2490 
2491       /* upper diagonal part: bb1 = bb1 - B*x */
2492       ierr = VecScale(&mone,mat->lvec);CHKERRQ(ierr);
2493       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2494 
2495       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2496 
2497       /* diagonal sweep */
2498       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2499     }
2500     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2501     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2502   } else {
2503     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2504   }
2505   PetscFunctionReturn(0);
2506 }
2507 
2508