xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision d0d4cfc252e3fccbd64022956ccaf75a1499cd49)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
701       PetscFunctionReturn(0);
702     }
703   }
704 
705   if (isdraw) {
706     PetscDraw       draw;
707     PetscTruth isnull;
708     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
709     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
710   }
711 
712   if (size == 1) {
713     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
714     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
715   } else {
716     /* assemble the entire matrix onto first processor. */
717     Mat         A;
718     Mat_SeqSBAIJ *Aloc;
719     Mat_SeqBAIJ *Bloc;
720     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
721     MatScalar   *a;
722 
723     /* Should this be the same type as mat? */
724     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
727     } else {
728       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
729     }
730     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
731     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
732     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
733 
734     /* copy over the A part */
735     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
736     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
737     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
738 
739     for (i=0; i<mbs; i++) {
740       rvals[0] = bs*(baij->rstart + i);
741       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
742       for (j=ai[i]; j<ai[i+1]; j++) {
743         col = (baij->cstart+aj[j])*bs;
744         for (k=0; k<bs; k++) {
745           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
746           col++; a += bs;
747         }
748       }
749     }
750     /* copy over the B part */
751     Bloc = (Mat_SeqBAIJ*)baij->B->data;
752     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
753     for (i=0; i<mbs; i++) {
754       rvals[0] = bs*(baij->rstart + i);
755       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
756       for (j=ai[i]; j<ai[i+1]; j++) {
757         col = baij->garray[aj[j]]*bs;
758         for (k=0; k<bs; k++) {
759           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
760           col++; a += bs;
761         }
762       }
763     }
764     ierr = PetscFree(rvals);CHKERRQ(ierr);
765     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
766     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     /*
768        Everyone has to call to draw the matrix since the graphics waits are
769        synchronized across all processors that share the PetscDraw object
770     */
771     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
772     if (!rank) {
773       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
774       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
775     }
776     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
777     ierr = MatDestroy(A);CHKERRQ(ierr);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ"
784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
785 {
786   PetscErrorCode ierr;
787   PetscTruth     iascii,isdraw,issocket,isbinary;
788 
789   PetscFunctionBegin;
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
791   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
792   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
793   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
794   if (iascii || isdraw || issocket || isbinary) {
795     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
796   } else {
797     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatDestroy_MPISBAIJ"
804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
805 {
806   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
807   PetscErrorCode ierr;
808 
809   PetscFunctionBegin;
810 #if defined(PETSC_USE_LOG)
811   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
812 #endif
813   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
814   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
815   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
816   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
817   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
818 #if defined (PETSC_USE_CTABLE)
819   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
820 #else
821   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
822 #endif
823   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
824   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
825   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
826   if (baij->slvec0) {
827     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
828     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
829   }
830   if (baij->slvec1) {
831     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
832     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
834   }
835   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
836   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
837   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
838   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
839 #if defined(PETSC_USE_MAT_SINGLE)
840   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
841 #endif
842   ierr = PetscFree(baij);CHKERRQ(ierr);
843 
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
846   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
847   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMult_MPISBAIJ"
853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
854 {
855   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
856   PetscErrorCode ierr;
857   PetscInt       nt,mbs=a->mbs,bs=A->bs;
858   PetscScalar    *x,*from,zero=0.0;
859 
860   PetscFunctionBegin;
861   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
862   if (nt != A->n) {
863     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
864   }
865   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
866   if (nt != A->m) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
868   }
869 
870   /* diagonal part */
871   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
872   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
873 
874   /* subdiagonal part */
875   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
876 
877   /* copy x into the vec slvec0 */
878   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
879   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
880   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
881   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
882 
883   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
884   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
885   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
886 
887   /* supperdiagonal part */
888   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       nt;
900 
901   PetscFunctionBegin;
902   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
903   if (nt != A->n) {
904     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
905   }
906   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
907   if (nt != A->m) {
908     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
909   }
910 
911   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
912   /* do diagonal part */
913   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
914   /* do supperdiagonal part */
915   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
916   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
917   /* do subdiagonal part */
918   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
919   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
921 
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931   PetscInt       mbs=a->mbs,bs=A->bs;
932   PetscScalar    *x,*from,zero=0.0;
933 
934   PetscFunctionBegin;
935   /*
936   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
937   PetscSynchronizedFlush(A->comm);
938   */
939   /* diagonal part */
940   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
941   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
942 
943   /* subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
945 
946   /* copy x into the vec slvec0 */
947   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
950   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
951 
952   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
955 
956   /* supperdiagonal part */
957   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
958 
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
971   /* do diagonal part */
972   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
973   /* do supperdiagonal part */
974   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976 
977   /* do subdiagonal part */
978   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
979   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
980   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 /*
986   This only works correctly for square matrices where the subblock A->A is the
987    diagonal block
988 */
989 #undef __FUNCT__
990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
992 {
993   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
998   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatScale_MPISBAIJ"
1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1005 {
1006   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1011   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1018 {
1019   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1020   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1021   PetscErrorCode ierr;
1022   PetscInt       bs = matin->bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1023   PetscInt       nztot,nzA,nzB,lrow,brstart = mat->rstart*bs,brend = mat->rend*bs;
1024   PetscInt       *cmap,*idx_p,cstart = mat->cstart;
1025 
1026   PetscFunctionBegin;
1027   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1028   mat->getrowactive = PETSC_TRUE;
1029 
1030   if (!mat->rowvalues && (idx || v)) {
1031     /*
1032         allocate enough space to hold information from the longest row.
1033     */
1034     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1035     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1036     PetscInt     max = 1,mbs = mat->mbs,tmp;
1037     for (i=0; i<mbs; i++) {
1038       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1039       if (max < tmp) { max = tmp; }
1040     }
1041     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1042     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1043   }
1044 
1045   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1046   lrow = row - brstart;  /* local row index */
1047 
1048   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1049   if (!v)   {pvA = 0; pvB = 0;}
1050   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1051   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1052   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1053   nztot = nzA + nzB;
1054 
1055   cmap  = mat->garray;
1056   if (v  || idx) {
1057     if (nztot) {
1058       /* Sort by increasing column numbers, assuming A and B already sorted */
1059       PetscInt imark = -1;
1060       if (v) {
1061         *v = v_p = mat->rowvalues;
1062         for (i=0; i<nzB; i++) {
1063           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1064           else break;
1065         }
1066         imark = i;
1067         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1068         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1069       }
1070       if (idx) {
1071         *idx = idx_p = mat->rowindices;
1072         if (imark > -1) {
1073           for (i=0; i<imark; i++) {
1074             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1075           }
1076         } else {
1077           for (i=0; i<nzB; i++) {
1078             if (cmap[cworkB[i]/bs] < cstart)
1079               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1080             else break;
1081           }
1082           imark = i;
1083         }
1084         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1085         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1086       }
1087     } else {
1088       if (idx) *idx = 0;
1089       if (v)   *v   = 0;
1090     }
1091   }
1092   *nz = nztot;
1093   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1094   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1095   PetscFunctionReturn(0);
1096 }
1097 
1098 #undef __FUNCT__
1099 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1100 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1101 {
1102   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1103 
1104   PetscFunctionBegin;
1105   if (!baij->getrowactive) {
1106     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1107   }
1108   baij->getrowactive = PETSC_FALSE;
1109   PetscFunctionReturn(0);
1110 }
1111 
1112 #undef __FUNCT__
1113 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1114 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1115 {
1116   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1117   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1118 
1119   PetscFunctionBegin;
1120   aA->getrow_utriangular = PETSC_TRUE;
1121   PetscFunctionReturn(0);
1122 }
1123 #undef __FUNCT__
1124 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1125 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1126 {
1127   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1128   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1129 
1130   PetscFunctionBegin;
1131   aA->getrow_utriangular = PETSC_FALSE;
1132   PetscFunctionReturn(0);
1133 }
1134 
1135 #undef __FUNCT__
1136 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1137 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1138 {
1139   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1140   PetscErrorCode ierr;
1141 
1142   PetscFunctionBegin;
1143   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1144   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1145   PetscFunctionReturn(0);
1146 }
1147 
1148 #undef __FUNCT__
1149 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1150 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1151 {
1152   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1153   PetscErrorCode ierr;
1154 
1155   PetscFunctionBegin;
1156   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1157   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1158   PetscFunctionReturn(0);
1159 }
1160 
1161 #undef __FUNCT__
1162 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1163 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1164 {
1165   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1166   PetscErrorCode ierr;
1167 
1168   PetscFunctionBegin;
1169   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1170   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1171   PetscFunctionReturn(0);
1172 }
1173 
1174 #undef __FUNCT__
1175 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1176 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1177 {
1178   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1179   Mat            A = a->A,B = a->B;
1180   PetscErrorCode ierr;
1181   PetscReal      isend[5],irecv[5];
1182 
1183   PetscFunctionBegin;
1184   info->block_size     = (PetscReal)matin->bs;
1185   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1186   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1187   isend[3] = info->memory;  isend[4] = info->mallocs;
1188   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1189   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1190   isend[3] += info->memory;  isend[4] += info->mallocs;
1191   if (flag == MAT_LOCAL) {
1192     info->nz_used      = isend[0];
1193     info->nz_allocated = isend[1];
1194     info->nz_unneeded  = isend[2];
1195     info->memory       = isend[3];
1196     info->mallocs      = isend[4];
1197   } else if (flag == MAT_GLOBAL_MAX) {
1198     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1199     info->nz_used      = irecv[0];
1200     info->nz_allocated = irecv[1];
1201     info->nz_unneeded  = irecv[2];
1202     info->memory       = irecv[3];
1203     info->mallocs      = irecv[4];
1204   } else if (flag == MAT_GLOBAL_SUM) {
1205     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1206     info->nz_used      = irecv[0];
1207     info->nz_allocated = irecv[1];
1208     info->nz_unneeded  = irecv[2];
1209     info->memory       = irecv[3];
1210     info->mallocs      = irecv[4];
1211   } else {
1212     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1213   }
1214   info->rows_global       = (PetscReal)A->M;
1215   info->columns_global    = (PetscReal)A->N;
1216   info->rows_local        = (PetscReal)A->m;
1217   info->columns_local     = (PetscReal)A->N;
1218   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1219   info->fill_ratio_needed = 0;
1220   info->factor_mallocs    = 0;
1221   PetscFunctionReturn(0);
1222 }
1223 
1224 #undef __FUNCT__
1225 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1226 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1227 {
1228   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1229   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1230   PetscErrorCode ierr;
1231 
1232   PetscFunctionBegin;
1233   switch (op) {
1234   case MAT_NO_NEW_NONZERO_LOCATIONS:
1235   case MAT_YES_NEW_NONZERO_LOCATIONS:
1236   case MAT_COLUMNS_UNSORTED:
1237   case MAT_COLUMNS_SORTED:
1238   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1239   case MAT_KEEP_ZEROED_ROWS:
1240   case MAT_NEW_NONZERO_LOCATION_ERR:
1241     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1242     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1243     break;
1244   case MAT_ROW_ORIENTED:
1245     a->roworiented = PETSC_TRUE;
1246     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1247     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1248     break;
1249   case MAT_ROWS_SORTED:
1250   case MAT_ROWS_UNSORTED:
1251   case MAT_YES_NEW_DIAGONALS:
1252     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1253     break;
1254   case MAT_COLUMN_ORIENTED:
1255     a->roworiented = PETSC_FALSE;
1256     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1257     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1258     break;
1259   case MAT_IGNORE_OFF_PROC_ENTRIES:
1260     a->donotstash = PETSC_TRUE;
1261     break;
1262   case MAT_NO_NEW_DIAGONALS:
1263     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1264   case MAT_USE_HASH_TABLE:
1265     a->ht_flag = PETSC_TRUE;
1266     break;
1267   case MAT_NOT_SYMMETRIC:
1268   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1269   case MAT_HERMITIAN:
1270     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1271   case MAT_SYMMETRIC:
1272   case MAT_STRUCTURALLY_SYMMETRIC:
1273   case MAT_NOT_HERMITIAN:
1274   case MAT_SYMMETRY_ETERNAL:
1275   case MAT_NOT_SYMMETRY_ETERNAL:
1276     break;
1277   case MAT_IGNORE_LOWER_TRIANGULAR:
1278     aA->ignore_ltriangular = PETSC_TRUE;
1279     break;
1280   case MAT_ERROR_LOWER_TRIANGULAR:
1281     aA->ignore_ltriangular = PETSC_FALSE;
1282     break;
1283   case MAT_GETROW_UPPERTRIANGULAR:
1284     aA->getrow_utriangular = PETSC_TRUE;
1285     break;
1286   default:
1287     SETERRQ(PETSC_ERR_SUP,"unknown option");
1288   }
1289   PetscFunctionReturn(0);
1290 }
1291 
1292 #undef __FUNCT__
1293 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1294 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1295 {
1296   PetscErrorCode ierr;
1297   PetscFunctionBegin;
1298   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1304 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1305 {
1306   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1307   Mat            a=baij->A, b=baij->B;
1308   PetscErrorCode ierr;
1309   PetscInt       nv,m,n;
1310   PetscTruth     flg;
1311 
1312   PetscFunctionBegin;
1313   if (ll != rr){
1314     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1315     if (!flg)
1316       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1317   }
1318   if (!ll) PetscFunctionReturn(0);
1319 
1320   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1321   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1322 
1323   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1324   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1325 
1326   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1327 
1328   /* left diagonalscale the off-diagonal part */
1329   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1330 
1331   /* scale the diagonal part */
1332   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1333 
1334   /* right diagonalscale the off-diagonal part */
1335   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1336   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1337   PetscFunctionReturn(0);
1338 }
1339 
1340 #undef __FUNCT__
1341 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1342 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1343 {
1344   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1345   MPI_Comm          comm = A->comm;
1346   static PetscTruth called = PETSC_FALSE;
1347   PetscErrorCode    ierr;
1348 
1349   PetscFunctionBegin;
1350   if (!a->rank) {
1351     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1352   }
1353   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1354   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1355   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1356   PetscFunctionReturn(0);
1357 }
1358 
1359 #undef __FUNCT__
1360 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1361 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1362 {
1363   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1364   PetscErrorCode ierr;
1365 
1366   PetscFunctionBegin;
1367   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1368   PetscFunctionReturn(0);
1369 }
1370 
1371 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1372 
1373 #undef __FUNCT__
1374 #define __FUNCT__ "MatEqual_MPISBAIJ"
1375 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1376 {
1377   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1378   Mat            a,b,c,d;
1379   PetscTruth     flg;
1380   PetscErrorCode ierr;
1381 
1382   PetscFunctionBegin;
1383   a = matA->A; b = matA->B;
1384   c = matB->A; d = matB->B;
1385 
1386   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1387   if (flg) {
1388     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1389   }
1390   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1391   PetscFunctionReturn(0);
1392 }
1393 
1394 #undef __FUNCT__
1395 #define __FUNCT__ "MatCopy_MPISBAIJ"
1396 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1397 {
1398   PetscErrorCode ierr;
1399   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1400   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1401 
1402   PetscFunctionBegin;
1403   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1404   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1405     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1406     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1407     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1408   } else {
1409     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1410     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1411   }
1412   PetscFunctionReturn(0);
1413 }
1414 
1415 #undef __FUNCT__
1416 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1417 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1418 {
1419   PetscErrorCode ierr;
1420 
1421   PetscFunctionBegin;
1422   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1423   PetscFunctionReturn(0);
1424 }
1425 
1426 #include "petscblaslapack.h"
1427 #undef __FUNCT__
1428 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1429 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1430 {
1431   PetscErrorCode ierr;
1432   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1433   PetscBLASInt   bnz,one=1;
1434   Mat_SeqSBAIJ   *xa,*ya;
1435   Mat_SeqBAIJ    *xb,*yb;
1436 
1437   PetscFunctionBegin;
1438   if (str == SAME_NONZERO_PATTERN) {
1439     PetscScalar alpha = a;
1440     xa = (Mat_SeqSBAIJ *)xx->A->data;
1441     ya = (Mat_SeqSBAIJ *)yy->A->data;
1442     bnz = (PetscBLASInt)xa->nz;
1443     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1444     xb = (Mat_SeqBAIJ *)xx->B->data;
1445     yb = (Mat_SeqBAIJ *)yy->B->data;
1446     bnz = (PetscBLASInt)xb->nz;
1447     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1448   } else {
1449     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1450     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1451     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1452   }
1453   PetscFunctionReturn(0);
1454 }
1455 
1456 #undef __FUNCT__
1457 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1458 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1459 {
1460   PetscErrorCode ierr;
1461   PetscInt       i;
1462   PetscTruth     flg;
1463 
1464   PetscFunctionBegin;
1465   for (i=0; i<n; i++) {
1466     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1467     if (!flg) {
1468       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1469     }
1470   }
1471   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1472   PetscFunctionReturn(0);
1473 }
1474 
1475 
1476 /* -------------------------------------------------------------------*/
1477 static struct _MatOps MatOps_Values = {
1478        MatSetValues_MPISBAIJ,
1479        MatGetRow_MPISBAIJ,
1480        MatRestoreRow_MPISBAIJ,
1481        MatMult_MPISBAIJ,
1482 /* 4*/ MatMultAdd_MPISBAIJ,
1483        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1484        MatMultAdd_MPISBAIJ,
1485        0,
1486        0,
1487        0,
1488 /*10*/ 0,
1489        0,
1490        0,
1491        MatRelax_MPISBAIJ,
1492        MatTranspose_MPISBAIJ,
1493 /*15*/ MatGetInfo_MPISBAIJ,
1494        MatEqual_MPISBAIJ,
1495        MatGetDiagonal_MPISBAIJ,
1496        MatDiagonalScale_MPISBAIJ,
1497        MatNorm_MPISBAIJ,
1498 /*20*/ MatAssemblyBegin_MPISBAIJ,
1499        MatAssemblyEnd_MPISBAIJ,
1500        0,
1501        MatSetOption_MPISBAIJ,
1502        MatZeroEntries_MPISBAIJ,
1503 /*25*/ 0,
1504        0,
1505        0,
1506        0,
1507        0,
1508 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1509        0,
1510        0,
1511        0,
1512        0,
1513 /*35*/ MatDuplicate_MPISBAIJ,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*40*/ MatAXPY_MPISBAIJ,
1519        MatGetSubMatrices_MPISBAIJ,
1520        MatIncreaseOverlap_MPISBAIJ,
1521        MatGetValues_MPISBAIJ,
1522        MatCopy_MPISBAIJ,
1523 /*45*/ MatPrintHelp_MPISBAIJ,
1524        MatScale_MPISBAIJ,
1525        0,
1526        0,
1527        0,
1528 /*50*/ 0,
1529        0,
1530        0,
1531        0,
1532        0,
1533 /*55*/ 0,
1534        0,
1535        MatSetUnfactored_MPISBAIJ,
1536        0,
1537        MatSetValuesBlocked_MPISBAIJ,
1538 /*60*/ 0,
1539        0,
1540        0,
1541        MatGetPetscMaps_Petsc,
1542        0,
1543 /*65*/ 0,
1544        0,
1545        0,
1546        0,
1547        0,
1548 /*70*/ MatGetRowMax_MPISBAIJ,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*75*/ 0,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*80*/ 0,
1559        0,
1560        0,
1561        0,
1562        MatLoad_MPISBAIJ,
1563 /*85*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*90*/ 0,
1569        0,
1570        0,
1571        0,
1572        0,
1573 /*95*/ 0,
1574        0,
1575        0,
1576        0,
1577        0,
1578 /*100*/0,
1579        0,
1580        0,
1581        0,
1582        0,
1583 /*105*/0,
1584        MatRealPart_MPISBAIJ,
1585        MatImaginaryPart_MPISBAIJ,
1586        MatGetRowUpperTriangular_MPISBAIJ,
1587        MatRestoreRowUpperTriangular_MPISBAIJ
1588 };
1589 
1590 
1591 EXTERN_C_BEGIN
1592 #undef __FUNCT__
1593 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1594 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1595 {
1596   PetscFunctionBegin;
1597   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1598   *iscopy = PETSC_FALSE;
1599   PetscFunctionReturn(0);
1600 }
1601 EXTERN_C_END
1602 
1603 EXTERN_C_BEGIN
1604 #undef __FUNCT__
1605 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1606 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1607 {
1608   Mat_MPISBAIJ   *b;
1609   PetscErrorCode ierr;
1610   PetscInt       i,mbs,Mbs;
1611 
1612   PetscFunctionBegin;
1613   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1614 
1615   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1616   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1617   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1618   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1619   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1620   if (d_nnz) {
1621     for (i=0; i<B->m/bs; i++) {
1622       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1623     }
1624   }
1625   if (o_nnz) {
1626     for (i=0; i<B->m/bs; i++) {
1627       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1628     }
1629   }
1630   B->preallocated = PETSC_TRUE;
1631   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1632   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1633   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1634   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1635 
1636   b   = (Mat_MPISBAIJ*)B->data;
1637   mbs = B->m/bs;
1638   Mbs = B->M/bs;
1639   if (mbs*bs != B->m) {
1640     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1641   }
1642 
1643   B->bs  = bs;
1644   b->bs2 = bs*bs;
1645   b->mbs = mbs;
1646   b->nbs = mbs;
1647   b->Mbs = Mbs;
1648   b->Nbs = Mbs;
1649 
1650   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1651   b->rowners[0]    = 0;
1652   for (i=2; i<=b->size; i++) {
1653     b->rowners[i] += b->rowners[i-1];
1654   }
1655   b->rstart    = b->rowners[b->rank];
1656   b->rend      = b->rowners[b->rank+1];
1657   b->cstart    = b->rstart;
1658   b->cend      = b->rend;
1659   for (i=0; i<=b->size; i++) {
1660     b->rowners_bs[i] = b->rowners[i]*bs;
1661   }
1662   b->rstart_bs = b-> rstart*bs;
1663   b->rend_bs   = b->rend*bs;
1664 
1665   b->cstart_bs = b->cstart*bs;
1666   b->cend_bs   = b->cend*bs;
1667 
1668   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1669   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1670   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1671   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1672   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1673 
1674   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1675   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1676   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1677   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1678   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1679 
1680   /* build cache for off array entries formed */
1681   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1682 
1683   PetscFunctionReturn(0);
1684 }
1685 EXTERN_C_END
1686 
1687 /*MC
1688    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1689    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1690 
1691    Options Database Keys:
1692 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1693 
1694   Level: beginner
1695 
1696 .seealso: MatCreateMPISBAIJ
1697 M*/
1698 
1699 EXTERN_C_BEGIN
1700 #undef __FUNCT__
1701 #define __FUNCT__ "MatCreate_MPISBAIJ"
1702 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1703 {
1704   Mat_MPISBAIJ   *b;
1705   PetscErrorCode ierr;
1706   PetscTruth     flg;
1707 
1708   PetscFunctionBegin;
1709 
1710   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1711   B->data = (void*)b;
1712   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1713 
1714   B->ops->destroy    = MatDestroy_MPISBAIJ;
1715   B->ops->view       = MatView_MPISBAIJ;
1716   B->mapping    = 0;
1717   B->factor     = 0;
1718   B->assembled  = PETSC_FALSE;
1719 
1720   B->insertmode = NOT_SET_VALUES;
1721   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1722   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1723 
1724   /* build local table of row and column ownerships */
1725   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1726   b->cowners    = b->rowners + b->size + 2;
1727   b->rowners_bs = b->cowners + b->size + 2;
1728   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1729 
1730   /* build cache for off array entries formed */
1731   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1732   b->donotstash  = PETSC_FALSE;
1733   b->colmap      = PETSC_NULL;
1734   b->garray      = PETSC_NULL;
1735   b->roworiented = PETSC_TRUE;
1736 
1737 #if defined(PETSC_USE_MAT_SINGLE)
1738   /* stuff for MatSetValues_XXX in single precision */
1739   b->setvalueslen     = 0;
1740   b->setvaluescopy    = PETSC_NULL;
1741 #endif
1742 
1743   /* stuff used in block assembly */
1744   b->barray       = 0;
1745 
1746   /* stuff used for matrix vector multiply */
1747   b->lvec         = 0;
1748   b->Mvctx        = 0;
1749   b->slvec0       = 0;
1750   b->slvec0b      = 0;
1751   b->slvec1       = 0;
1752   b->slvec1a      = 0;
1753   b->slvec1b      = 0;
1754   b->sMvctx       = 0;
1755 
1756   /* stuff for MatGetRow() */
1757   b->rowindices   = 0;
1758   b->rowvalues    = 0;
1759   b->getrowactive = PETSC_FALSE;
1760 
1761   /* hash table stuff */
1762   b->ht           = 0;
1763   b->hd           = 0;
1764   b->ht_size      = 0;
1765   b->ht_flag      = PETSC_FALSE;
1766   b->ht_fact      = 0;
1767   b->ht_total_ct  = 0;
1768   b->ht_insert_ct = 0;
1769 
1770   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1771   if (flg) {
1772     PetscReal fact = 1.39;
1773     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1774     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1775     if (fact <= 1.0) fact = 1.39;
1776     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1777     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1778   }
1779   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1780                                      "MatStoreValues_MPISBAIJ",
1781                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1782   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1783                                      "MatRetrieveValues_MPISBAIJ",
1784                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1785   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1786                                      "MatGetDiagonalBlock_MPISBAIJ",
1787                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1788   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1789                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1790                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1791   B->symmetric                  = PETSC_TRUE;
1792   B->structurally_symmetric     = PETSC_TRUE;
1793   B->symmetric_set              = PETSC_TRUE;
1794   B->structurally_symmetric_set = PETSC_TRUE;
1795   PetscFunctionReturn(0);
1796 }
1797 EXTERN_C_END
1798 
1799 /*MC
1800    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1801 
1802    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1803    and MATMPISBAIJ otherwise.
1804 
1805    Options Database Keys:
1806 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1807 
1808   Level: beginner
1809 
1810 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1811 M*/
1812 
1813 EXTERN_C_BEGIN
1814 #undef __FUNCT__
1815 #define __FUNCT__ "MatCreate_SBAIJ"
1816 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1817 {
1818   PetscErrorCode ierr;
1819   PetscMPIInt    size;
1820 
1821   PetscFunctionBegin;
1822   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1823   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1824   if (size == 1) {
1825     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1826   } else {
1827     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1828   }
1829   PetscFunctionReturn(0);
1830 }
1831 EXTERN_C_END
1832 
1833 #undef __FUNCT__
1834 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1835 /*@C
1836    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1837    the user should preallocate the matrix storage by setting the parameters
1838    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1839    performance can be increased by more than a factor of 50.
1840 
1841    Collective on Mat
1842 
1843    Input Parameters:
1844 +  A - the matrix
1845 .  bs   - size of blockk
1846 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1847            submatrix  (same for all local rows)
1848 .  d_nnz - array containing the number of block nonzeros in the various block rows
1849            in the upper triangular and diagonal part of the in diagonal portion of the local
1850            (possibly different for each block row) or PETSC_NULL.  You must leave room
1851            for the diagonal entry even if it is zero.
1852 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1853            submatrix (same for all local rows).
1854 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1855            off-diagonal portion of the local submatrix (possibly different for
1856            each block row) or PETSC_NULL.
1857 
1858 
1859    Options Database Keys:
1860 .   -mat_no_unroll - uses code that does not unroll the loops in the
1861                      block calculations (much slower)
1862 .   -mat_block_size - size of the blocks to use
1863 
1864    Notes:
1865 
1866    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1867    than it must be used on all processors that share the object for that argument.
1868 
1869    If the *_nnz parameter is given then the *_nz parameter is ignored
1870 
1871    Storage Information:
1872    For a square global matrix we define each processor's diagonal portion
1873    to be its local rows and the corresponding columns (a square submatrix);
1874    each processor's off-diagonal portion encompasses the remainder of the
1875    local matrix (a rectangular submatrix).
1876 
1877    The user can specify preallocated storage for the diagonal part of
1878    the local submatrix with either d_nz or d_nnz (not both).  Set
1879    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1880    memory allocation.  Likewise, specify preallocated storage for the
1881    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1882 
1883    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1884    the figure below we depict these three local rows and all columns (0-11).
1885 
1886 .vb
1887            0 1 2 3 4 5 6 7 8 9 10 11
1888           -------------------
1889    row 3  |  o o o d d d o o o o o o
1890    row 4  |  o o o d d d o o o o o o
1891    row 5  |  o o o d d d o o o o o o
1892           -------------------
1893 .ve
1894 
1895    Thus, any entries in the d locations are stored in the d (diagonal)
1896    submatrix, and any entries in the o locations are stored in the
1897    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1898    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1899 
1900    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1901    plus the diagonal part of the d matrix,
1902    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1903    In general, for PDE problems in which most nonzeros are near the diagonal,
1904    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1905    or you will get TERRIBLE performance; see the users' manual chapter on
1906    matrices.
1907 
1908    Level: intermediate
1909 
1910 .keywords: matrix, block, aij, compressed row, sparse, parallel
1911 
1912 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1913 @*/
1914 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1915 {
1916   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1917 
1918   PetscFunctionBegin;
1919   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1920   if (f) {
1921     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1922   }
1923   PetscFunctionReturn(0);
1924 }
1925 
1926 #undef __FUNCT__
1927 #define __FUNCT__ "MatCreateMPISBAIJ"
1928 /*@C
1929    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1930    (block compressed row).  For good matrix assembly performance
1931    the user should preallocate the matrix storage by setting the parameters
1932    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1933    performance can be increased by more than a factor of 50.
1934 
1935    Collective on MPI_Comm
1936 
1937    Input Parameters:
1938 +  comm - MPI communicator
1939 .  bs   - size of blockk
1940 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1941            This value should be the same as the local size used in creating the
1942            y vector for the matrix-vector product y = Ax.
1943 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1944            This value should be the same as the local size used in creating the
1945            x vector for the matrix-vector product y = Ax.
1946 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1947 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1948 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1949            submatrix  (same for all local rows)
1950 .  d_nnz - array containing the number of block nonzeros in the various block rows
1951            in the upper triangular portion of the in diagonal portion of the local
1952            (possibly different for each block block row) or PETSC_NULL.
1953            You must leave room for the diagonal entry even if it is zero.
1954 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1955            submatrix (same for all local rows).
1956 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1957            off-diagonal portion of the local submatrix (possibly different for
1958            each block row) or PETSC_NULL.
1959 
1960    Output Parameter:
1961 .  A - the matrix
1962 
1963    Options Database Keys:
1964 .   -mat_no_unroll - uses code that does not unroll the loops in the
1965                      block calculations (much slower)
1966 .   -mat_block_size - size of the blocks to use
1967 .   -mat_mpi - use the parallel matrix data structures even on one processor
1968                (defaults to using SeqBAIJ format on one processor)
1969 
1970    Notes:
1971    The number of rows and columns must be divisible by blocksize.
1972 
1973    The user MUST specify either the local or global matrix dimensions
1974    (possibly both).
1975 
1976    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1977    than it must be used on all processors that share the object for that argument.
1978 
1979    If the *_nnz parameter is given then the *_nz parameter is ignored
1980 
1981    Storage Information:
1982    For a square global matrix we define each processor's diagonal portion
1983    to be its local rows and the corresponding columns (a square submatrix);
1984    each processor's off-diagonal portion encompasses the remainder of the
1985    local matrix (a rectangular submatrix).
1986 
1987    The user can specify preallocated storage for the diagonal part of
1988    the local submatrix with either d_nz or d_nnz (not both).  Set
1989    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1990    memory allocation.  Likewise, specify preallocated storage for the
1991    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1992 
1993    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1994    the figure below we depict these three local rows and all columns (0-11).
1995 
1996 .vb
1997            0 1 2 3 4 5 6 7 8 9 10 11
1998           -------------------
1999    row 3  |  o o o d d d o o o o o o
2000    row 4  |  o o o d d d o o o o o o
2001    row 5  |  o o o d d d o o o o o o
2002           -------------------
2003 .ve
2004 
2005    Thus, any entries in the d locations are stored in the d (diagonal)
2006    submatrix, and any entries in the o locations are stored in the
2007    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2008    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2009 
2010    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2011    plus the diagonal part of the d matrix,
2012    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2013    In general, for PDE problems in which most nonzeros are near the diagonal,
2014    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2015    or you will get TERRIBLE performance; see the users' manual chapter on
2016    matrices.
2017 
2018    Level: intermediate
2019 
2020 .keywords: matrix, block, aij, compressed row, sparse, parallel
2021 
2022 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2023 @*/
2024 
2025 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2026 {
2027   PetscErrorCode ierr;
2028   PetscMPIInt    size;
2029 
2030   PetscFunctionBegin;
2031   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2032   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2033   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2034   if (size > 1) {
2035     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2036     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2037   } else {
2038     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2039     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2040   }
2041   PetscFunctionReturn(0);
2042 }
2043 
2044 
2045 #undef __FUNCT__
2046 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2047 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2048 {
2049   Mat            mat;
2050   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2051   PetscErrorCode ierr;
2052   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
2053   PetscScalar    *array;
2054 
2055   PetscFunctionBegin;
2056   *newmat       = 0;
2057   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2058   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
2059   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2060   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2061 
2062   mat->factor       = matin->factor;
2063   mat->preallocated = PETSC_TRUE;
2064   mat->assembled    = PETSC_TRUE;
2065   mat->insertmode   = NOT_SET_VALUES;
2066 
2067   a = (Mat_MPISBAIJ*)mat->data;
2068   mat->bs  = matin->bs;
2069   a->bs2   = oldmat->bs2;
2070   a->mbs   = oldmat->mbs;
2071   a->nbs   = oldmat->nbs;
2072   a->Mbs   = oldmat->Mbs;
2073   a->Nbs   = oldmat->Nbs;
2074 
2075   a->rstart       = oldmat->rstart;
2076   a->rend         = oldmat->rend;
2077   a->cstart       = oldmat->cstart;
2078   a->cend         = oldmat->cend;
2079   a->size         = oldmat->size;
2080   a->rank         = oldmat->rank;
2081   a->donotstash   = oldmat->donotstash;
2082   a->roworiented  = oldmat->roworiented;
2083   a->rowindices   = 0;
2084   a->rowvalues    = 0;
2085   a->getrowactive = PETSC_FALSE;
2086   a->barray       = 0;
2087   a->rstart_bs    = oldmat->rstart_bs;
2088   a->rend_bs      = oldmat->rend_bs;
2089   a->cstart_bs    = oldmat->cstart_bs;
2090   a->cend_bs      = oldmat->cend_bs;
2091 
2092   /* hash table stuff */
2093   a->ht           = 0;
2094   a->hd           = 0;
2095   a->ht_size      = 0;
2096   a->ht_flag      = oldmat->ht_flag;
2097   a->ht_fact      = oldmat->ht_fact;
2098   a->ht_total_ct  = 0;
2099   a->ht_insert_ct = 0;
2100 
2101   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2102   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2103   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
2104   if (oldmat->colmap) {
2105 #if defined (PETSC_USE_CTABLE)
2106     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2107 #else
2108     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2109     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2110     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2111 #endif
2112   } else a->colmap = 0;
2113 
2114   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2115     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2116     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2117     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2118   } else a->garray = 0;
2119 
2120   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2122   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2123   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2124 
2125   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2126   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2127   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2128   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2129 
2130   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2131   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2132   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2133   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2134   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2135   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2136   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2137   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2140   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2141   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2142   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2143 
2144   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2145   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2146   a->sMvctx = oldmat->sMvctx;
2147   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2148 
2149   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2150   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2151   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2153   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2154   *newmat = mat;
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #include "petscsys.h"
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatLoad_MPISBAIJ"
2162 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2163 {
2164   Mat            A;
2165   PetscErrorCode ierr;
2166   PetscInt       i,nz,j,rstart,rend;
2167   PetscScalar    *vals,*buf;
2168   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2169   MPI_Status     status;
2170   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2171   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2172   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2173   PetscInt       bs=1,Mbs,mbs,extra_rows;
2174   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2175   PetscInt       dcount,kmax,k,nzcount,tmp;
2176   int            fd;
2177 
2178   PetscFunctionBegin;
2179   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2180 
2181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2183   if (!rank) {
2184     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2185     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2186     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2187     if (header[3] < 0) {
2188       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2189     }
2190   }
2191 
2192   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2193   M = header[1]; N = header[2];
2194 
2195   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2196 
2197   /*
2198      This code adds extra rows to make sure the number of rows is
2199      divisible by the blocksize
2200   */
2201   Mbs        = M/bs;
2202   extra_rows = bs - M + bs*(Mbs);
2203   if (extra_rows == bs) extra_rows = 0;
2204   else                  Mbs++;
2205   if (extra_rows &&!rank) {
2206     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2207   }
2208 
2209   /* determine ownership of all rows */
2210   mbs        = Mbs/size + ((Mbs % size) > rank);
2211   m          = mbs*bs;
2212   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2213   browners   = rowners + size + 1;
2214   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2215   rowners[0] = 0;
2216   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2217   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2218   rstart = rowners[rank];
2219   rend   = rowners[rank+1];
2220 
2221   /* distribute row lengths to all processors */
2222   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2223   if (!rank) {
2224     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2225     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2226     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2227     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2228     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2229     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2230     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2231   } else {
2232     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2233   }
2234 
2235   if (!rank) {   /* procs[0] */
2236     /* calculate the number of nonzeros on each processor */
2237     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2238     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2239     for (i=0; i<size; i++) {
2240       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2241         procsnz[i] += rowlengths[j];
2242       }
2243     }
2244     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2245 
2246     /* determine max buffer needed and allocate it */
2247     maxnz = 0;
2248     for (i=0; i<size; i++) {
2249       maxnz = PetscMax(maxnz,procsnz[i]);
2250     }
2251     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2252 
2253     /* read in my part of the matrix column indices  */
2254     nz     = procsnz[0];
2255     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2256     mycols = ibuf;
2257     if (size == 1)  nz -= extra_rows;
2258     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2259     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2260 
2261     /* read in every ones (except the last) and ship off */
2262     for (i=1; i<size-1; i++) {
2263       nz   = procsnz[i];
2264       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2265       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2266     }
2267     /* read in the stuff for the last proc */
2268     if (size != 1) {
2269       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2270       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2271       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2272       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2273     }
2274     ierr = PetscFree(cols);CHKERRQ(ierr);
2275   } else {  /* procs[i], i>0 */
2276     /* determine buffer space needed for message */
2277     nz = 0;
2278     for (i=0; i<m; i++) {
2279       nz += locrowlens[i];
2280     }
2281     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2282     mycols = ibuf;
2283     /* receive message of column indices*/
2284     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2285     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2287   }
2288 
2289   /* loop over local rows, determining number of off diagonal entries */
2290   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2291   odlens   = dlens + (rend-rstart);
2292   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2293   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2294   masked1  = mask    + Mbs;
2295   masked2  = masked1 + Mbs;
2296   rowcount = 0; nzcount = 0;
2297   for (i=0; i<mbs; i++) {
2298     dcount  = 0;
2299     odcount = 0;
2300     for (j=0; j<bs; j++) {
2301       kmax = locrowlens[rowcount];
2302       for (k=0; k<kmax; k++) {
2303         tmp = mycols[nzcount++]/bs; /* block col. index */
2304         if (!mask[tmp]) {
2305           mask[tmp] = 1;
2306           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2307           else masked1[dcount++] = tmp; /* entry in diag portion */
2308         }
2309       }
2310       rowcount++;
2311     }
2312 
2313     dlens[i]  = dcount;  /* d_nzz[i] */
2314     odlens[i] = odcount; /* o_nzz[i] */
2315 
2316     /* zero out the mask elements we set */
2317     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2318     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2319   }
2320 
2321   /* create our matrix */
2322   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2323   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2324   ierr = MatSetType(A,type);CHKERRQ(ierr);
2325   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2326   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2327 
2328   if (!rank) {
2329     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2330     /* read in my part of the matrix numerical values  */
2331     nz = procsnz[0];
2332     vals = buf;
2333     mycols = ibuf;
2334     if (size == 1)  nz -= extra_rows;
2335     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2336     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2337 
2338     /* insert into matrix */
2339     jj      = rstart*bs;
2340     for (i=0; i<m; i++) {
2341       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2342       mycols += locrowlens[i];
2343       vals   += locrowlens[i];
2344       jj++;
2345     }
2346 
2347     /* read in other processors (except the last one) and ship out */
2348     for (i=1; i<size-1; i++) {
2349       nz   = procsnz[i];
2350       vals = buf;
2351       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2352       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2353     }
2354     /* the last proc */
2355     if (size != 1){
2356       nz   = procsnz[i] - extra_rows;
2357       vals = buf;
2358       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2359       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2360       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2361     }
2362     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2363 
2364   } else {
2365     /* receive numeric values */
2366     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2367 
2368     /* receive message of values*/
2369     vals   = buf;
2370     mycols = ibuf;
2371     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2372     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2373     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2374 
2375     /* insert into matrix */
2376     jj      = rstart*bs;
2377     for (i=0; i<m; i++) {
2378       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2379       mycols += locrowlens[i];
2380       vals   += locrowlens[i];
2381       jj++;
2382     }
2383   }
2384 
2385   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2386   ierr = PetscFree(buf);CHKERRQ(ierr);
2387   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2388   ierr = PetscFree(rowners);CHKERRQ(ierr);
2389   ierr = PetscFree(dlens);CHKERRQ(ierr);
2390   ierr = PetscFree(mask);CHKERRQ(ierr);
2391   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   *newmat = A;
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2399 /*XXXXX@
2400    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2401 
2402    Input Parameters:
2403 .  mat  - the matrix
2404 .  fact - factor
2405 
2406    Collective on Mat
2407 
2408    Level: advanced
2409 
2410   Notes:
2411    This can also be set by the command line option: -mat_use_hash_table fact
2412 
2413 .keywords: matrix, hashtable, factor, HT
2414 
2415 .seealso: MatSetOption()
2416 @XXXXX*/
2417 
2418 
2419 #undef __FUNCT__
2420 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2421 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2422 {
2423   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2424   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2425   PetscReal      atmp;
2426   PetscReal      *work,*svalues,*rvalues;
2427   PetscErrorCode ierr;
2428   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2429   PetscMPIInt    rank,size;
2430   PetscInt       *rowners_bs,dest,count,source;
2431   PetscScalar    *va;
2432   MatScalar      *ba;
2433   MPI_Status     stat;
2434 
2435   PetscFunctionBegin;
2436   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2437   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2438 
2439   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2440   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2441 
2442   bs   = A->bs;
2443   mbs  = a->mbs;
2444   Mbs  = a->Mbs;
2445   ba   = b->a;
2446   bi   = b->i;
2447   bj   = b->j;
2448 
2449   /* find ownerships */
2450   rowners_bs = a->rowners_bs;
2451 
2452   /* each proc creates an array to be distributed */
2453   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2454   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2455 
2456   /* row_max for B */
2457   if (rank != size-1){
2458     for (i=0; i<mbs; i++) {
2459       ncols = bi[1] - bi[0]; bi++;
2460       brow  = bs*i;
2461       for (j=0; j<ncols; j++){
2462         bcol = bs*(*bj);
2463         for (kcol=0; kcol<bs; kcol++){
2464           col = bcol + kcol;                 /* local col index */
2465           col += rowners_bs[rank+1];      /* global col index */
2466           for (krow=0; krow<bs; krow++){
2467             atmp = PetscAbsScalar(*ba); ba++;
2468             row = brow + krow;    /* local row index */
2469             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2470             if (work[col] < atmp) work[col] = atmp;
2471           }
2472         }
2473         bj++;
2474       }
2475     }
2476 
2477     /* send values to its owners */
2478     for (dest=rank+1; dest<size; dest++){
2479       svalues = work + rowners_bs[dest];
2480       count   = rowners_bs[dest+1]-rowners_bs[dest];
2481       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2482     }
2483   }
2484 
2485   /* receive values */
2486   if (rank){
2487     rvalues = work;
2488     count   = rowners_bs[rank+1]-rowners_bs[rank];
2489     for (source=0; source<rank; source++){
2490       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2491       /* process values */
2492       for (i=0; i<count; i++){
2493         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2494       }
2495     }
2496   }
2497 
2498   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2499   ierr = PetscFree(work);CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatRelax_MPISBAIJ"
2505 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2506 {
2507   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2508   PetscErrorCode ierr;
2509   PetscInt       mbs=mat->mbs,bs=matin->bs;
2510   PetscScalar    *x,*b,*ptr,zero=0.0;
2511   Vec            bb1;
2512 
2513   PetscFunctionBegin;
2514   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2515   if (bs > 1)
2516     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2517 
2518   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2519     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2520       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2521       its--;
2522     }
2523 
2524     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2525     while (its--){
2526 
2527       /* lower triangular part: slvec0b = - B^T*xx */
2528       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2529 
2530       /* copy xx into slvec0a */
2531       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2532       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2533       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2534       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2535 
2536       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2537 
2538       /* copy bb into slvec1a */
2539       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2540       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2541       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2542       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2543 
2544       /* set slvec1b = 0 */
2545       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2546 
2547       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2548       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2549       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2550       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2551 
2552       /* upper triangular part: bb1 = bb1 - B*x */
2553       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2554 
2555       /* local diagonal sweep */
2556       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2557     }
2558     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2559   } else {
2560     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2567 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2568 {
2569   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2570   PetscErrorCode ierr;
2571   Vec            lvec1,bb1;
2572 
2573   PetscFunctionBegin;
2574   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2575   if (matin->bs > 1)
2576     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2577 
2578   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2579     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2580       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2581       its--;
2582     }
2583 
2584     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2585     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2586     while (its--){
2587       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2588 
2589       /* lower diagonal part: bb1 = bb - B^T*xx */
2590       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2591       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2592 
2593       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2594       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2595       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2596 
2597       /* upper diagonal part: bb1 = bb1 - B*x */
2598       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2599       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2600 
2601       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2602 
2603       /* diagonal sweep */
2604       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2605     }
2606     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2607     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2608   } else {
2609     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2610   }
2611   PetscFunctionReturn(0);
2612 }
2613 
2614