xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4dd6493e1221b6763b56268ef2ff000b73ff6dae)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
701       PetscFunctionReturn(0);
702     }
703   }
704 
705   if (isdraw) {
706     PetscDraw       draw;
707     PetscTruth isnull;
708     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
709     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
710   }
711 
712   if (size == 1) {
713     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
714     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
715   } else {
716     /* assemble the entire matrix onto first processor. */
717     Mat         A;
718     Mat_SeqSBAIJ *Aloc;
719     Mat_SeqBAIJ *Bloc;
720     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
721     MatScalar   *a;
722 
723     /* Should this be the same type as mat? */
724     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
727     } else {
728       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
729     }
730     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
731     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
732     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
733 
734     /* copy over the A part */
735     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
736     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
737     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
738 
739     for (i=0; i<mbs; i++) {
740       rvals[0] = bs*(baij->rstart + i);
741       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
742       for (j=ai[i]; j<ai[i+1]; j++) {
743         col = (baij->cstart+aj[j])*bs;
744         for (k=0; k<bs; k++) {
745           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
746           col++; a += bs;
747         }
748       }
749     }
750     /* copy over the B part */
751     Bloc = (Mat_SeqBAIJ*)baij->B->data;
752     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
753     for (i=0; i<mbs; i++) {
754       rvals[0] = bs*(baij->rstart + i);
755       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
756       for (j=ai[i]; j<ai[i+1]; j++) {
757         col = baij->garray[aj[j]]*bs;
758         for (k=0; k<bs; k++) {
759           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
760           col++; a += bs;
761         }
762       }
763     }
764     ierr = PetscFree(rvals);CHKERRQ(ierr);
765     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
766     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     /*
768        Everyone has to call to draw the matrix since the graphics waits are
769        synchronized across all processors that share the PetscDraw object
770     */
771     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
772     if (!rank) {
773       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
774       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
775     }
776     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
777     ierr = MatDestroy(A);CHKERRQ(ierr);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ"
784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
785 {
786   PetscErrorCode ierr;
787   PetscTruth     iascii,isdraw,issocket,isbinary;
788 
789   PetscFunctionBegin;
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
791   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
792   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
793   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
794   if (iascii || isdraw || issocket || isbinary) {
795     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
796   } else {
797     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatDestroy_MPISBAIJ"
804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
805 {
806   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
807   PetscErrorCode ierr;
808 
809   PetscFunctionBegin;
810 #if defined(PETSC_USE_LOG)
811   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
812 #endif
813   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
814   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
815   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
816   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
817   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
818 #if defined (PETSC_USE_CTABLE)
819   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
820 #else
821   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
822 #endif
823   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
824   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
825   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
826   if (baij->slvec0) {
827     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
828     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
829   }
830   if (baij->slvec1) {
831     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
832     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
834   }
835   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
836   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
837   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
838   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
839 #if defined(PETSC_USE_MAT_SINGLE)
840   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
841 #endif
842   ierr = PetscFree(baij);CHKERRQ(ierr);
843 
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
846   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
847   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMult_MPISBAIJ"
853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
854 {
855   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
856   PetscErrorCode ierr;
857   PetscInt       nt,mbs=a->mbs,bs=A->bs;
858   PetscScalar    *x,*from,zero=0.0;
859 
860   PetscFunctionBegin;
861   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
862   if (nt != A->n) {
863     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
864   }
865   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
866   if (nt != A->m) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
868   }
869 
870   /* diagonal part */
871   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
872   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
873 
874   /* subdiagonal part */
875   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
876 
877   /* copy x into the vec slvec0 */
878   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
879   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
880   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
881   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
882 
883   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
884   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
885   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
886 
887   /* supperdiagonal part */
888   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       nt;
900 
901   PetscFunctionBegin;
902   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
903   if (nt != A->n) {
904     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
905   }
906   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
907   if (nt != A->m) {
908     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
909   }
910 
911   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
912   /* do diagonal part */
913   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
914   /* do supperdiagonal part */
915   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
916   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
917   /* do subdiagonal part */
918   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
919   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
921 
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931   PetscInt       mbs=a->mbs,bs=A->bs;
932   PetscScalar    *x,*from,zero=0.0;
933 
934   PetscFunctionBegin;
935   /*
936   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
937   PetscSynchronizedFlush(A->comm);
938   */
939   /* diagonal part */
940   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
941   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
942 
943   /* subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
945 
946   /* copy x into the vec slvec0 */
947   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
950   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
951 
952   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
955 
956   /* supperdiagonal part */
957   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
958 
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
971   /* do diagonal part */
972   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
973   /* do supperdiagonal part */
974   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976 
977   /* do subdiagonal part */
978   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
979   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
980   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 /*
986   This only works correctly for square matrices where the subblock A->A is the
987    diagonal block
988 */
989 #undef __FUNCT__
990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
992 {
993   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
998   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatScale_MPISBAIJ"
1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1005 {
1006   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1011   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1018 {
1019   PetscFunctionBegin;
1020   if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1027 {
1028   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1029 
1030   PetscFunctionBegin;
1031   if (!baij->getrowactive) {
1032     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1033   }
1034   baij->getrowactive = PETSC_FALSE;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1040 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1041 {
1042   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1047   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1053 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1054 {
1055   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1056   Mat            A = a->A,B = a->B;
1057   PetscErrorCode ierr;
1058   PetscReal      isend[5],irecv[5];
1059 
1060   PetscFunctionBegin;
1061   info->block_size     = (PetscReal)matin->bs;
1062   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1063   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1064   isend[3] = info->memory;  isend[4] = info->mallocs;
1065   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1066   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1067   isend[3] += info->memory;  isend[4] += info->mallocs;
1068   if (flag == MAT_LOCAL) {
1069     info->nz_used      = isend[0];
1070     info->nz_allocated = isend[1];
1071     info->nz_unneeded  = isend[2];
1072     info->memory       = isend[3];
1073     info->mallocs      = isend[4];
1074   } else if (flag == MAT_GLOBAL_MAX) {
1075     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1076     info->nz_used      = irecv[0];
1077     info->nz_allocated = irecv[1];
1078     info->nz_unneeded  = irecv[2];
1079     info->memory       = irecv[3];
1080     info->mallocs      = irecv[4];
1081   } else if (flag == MAT_GLOBAL_SUM) {
1082     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1083     info->nz_used      = irecv[0];
1084     info->nz_allocated = irecv[1];
1085     info->nz_unneeded  = irecv[2];
1086     info->memory       = irecv[3];
1087     info->mallocs      = irecv[4];
1088   } else {
1089     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1090   }
1091   info->rows_global       = (PetscReal)A->M;
1092   info->columns_global    = (PetscReal)A->N;
1093   info->rows_local        = (PetscReal)A->m;
1094   info->columns_local     = (PetscReal)A->N;
1095   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1096   info->fill_ratio_needed = 0;
1097   info->factor_mallocs    = 0;
1098   PetscFunctionReturn(0);
1099 }
1100 
1101 #undef __FUNCT__
1102 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1103 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1104 {
1105   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1106   PetscErrorCode ierr;
1107 
1108   PetscFunctionBegin;
1109   switch (op) {
1110   case MAT_NO_NEW_NONZERO_LOCATIONS:
1111   case MAT_YES_NEW_NONZERO_LOCATIONS:
1112   case MAT_COLUMNS_UNSORTED:
1113   case MAT_COLUMNS_SORTED:
1114   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1115   case MAT_KEEP_ZEROED_ROWS:
1116   case MAT_NEW_NONZERO_LOCATION_ERR:
1117     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1118     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1119     break;
1120   case MAT_ROW_ORIENTED:
1121     a->roworiented = PETSC_TRUE;
1122     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1123     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1124     break;
1125   case MAT_ROWS_SORTED:
1126   case MAT_ROWS_UNSORTED:
1127   case MAT_YES_NEW_DIAGONALS:
1128     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1129     break;
1130   case MAT_COLUMN_ORIENTED:
1131     a->roworiented = PETSC_FALSE;
1132     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1133     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1134     break;
1135   case MAT_IGNORE_OFF_PROC_ENTRIES:
1136     a->donotstash = PETSC_TRUE;
1137     break;
1138   case MAT_NO_NEW_DIAGONALS:
1139     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1140   case MAT_USE_HASH_TABLE:
1141     a->ht_flag = PETSC_TRUE;
1142     break;
1143   case MAT_NOT_SYMMETRIC:
1144   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1145   case MAT_HERMITIAN:
1146     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1147   case MAT_SYMMETRIC:
1148   case MAT_STRUCTURALLY_SYMMETRIC:
1149   case MAT_NOT_HERMITIAN:
1150   case MAT_SYMMETRY_ETERNAL:
1151   case MAT_NOT_SYMMETRY_ETERNAL:
1152     break;
1153   default:
1154     SETERRQ(PETSC_ERR_SUP,"unknown option");
1155   }
1156   PetscFunctionReturn(0);
1157 }
1158 
1159 #undef __FUNCT__
1160 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1161 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1162 {
1163   PetscErrorCode ierr;
1164   PetscFunctionBegin;
1165   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1166   PetscFunctionReturn(0);
1167 }
1168 
1169 #undef __FUNCT__
1170 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1171 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1172 {
1173   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1174   Mat            a=baij->A, b=baij->B;
1175   PetscErrorCode ierr;
1176   PetscInt       nv,m,n;
1177   PetscTruth     flg;
1178 
1179   PetscFunctionBegin;
1180   if (ll != rr){
1181     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1182     if (!flg)
1183       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1184   }
1185   if (!ll) PetscFunctionReturn(0);
1186 
1187   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1188   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1189 
1190   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1191   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1192 
1193   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1194 
1195   /* left diagonalscale the off-diagonal part */
1196   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1197 
1198   /* scale the diagonal part */
1199   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1200 
1201   /* right diagonalscale the off-diagonal part */
1202   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1203   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1204   PetscFunctionReturn(0);
1205 }
1206 
1207 #undef __FUNCT__
1208 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1209 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1210 {
1211   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1212   MPI_Comm          comm = A->comm;
1213   static PetscTruth called = PETSC_FALSE;
1214   PetscErrorCode    ierr;
1215 
1216   PetscFunctionBegin;
1217   if (!a->rank) {
1218     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1219   }
1220   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1221   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1222   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1223   PetscFunctionReturn(0);
1224 }
1225 
1226 #undef __FUNCT__
1227 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1228 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1229 {
1230   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1231   PetscErrorCode ierr;
1232 
1233   PetscFunctionBegin;
1234   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1235   PetscFunctionReturn(0);
1236 }
1237 
1238 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1239 
1240 #undef __FUNCT__
1241 #define __FUNCT__ "MatEqual_MPISBAIJ"
1242 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1243 {
1244   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1245   Mat            a,b,c,d;
1246   PetscTruth     flg;
1247   PetscErrorCode ierr;
1248 
1249   PetscFunctionBegin;
1250   a = matA->A; b = matA->B;
1251   c = matB->A; d = matB->B;
1252 
1253   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1254   if (flg) {
1255     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1256   }
1257   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1258   PetscFunctionReturn(0);
1259 }
1260 
1261 #undef __FUNCT__
1262 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1263 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1264 {
1265   PetscErrorCode ierr;
1266 
1267   PetscFunctionBegin;
1268   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1269   PetscFunctionReturn(0);
1270 }
1271 
1272 #undef __FUNCT__
1273 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1274 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1275 {
1276   PetscErrorCode ierr;
1277   PetscInt       i;
1278   PetscTruth     flg;
1279 
1280   PetscFunctionBegin;
1281   for (i=0; i<n; i++) {
1282     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1283     if (!flg) {
1284       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1285     }
1286   }
1287   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1288   PetscFunctionReturn(0);
1289 }
1290 
1291 
1292 /* -------------------------------------------------------------------*/
1293 static struct _MatOps MatOps_Values = {
1294        MatSetValues_MPISBAIJ,
1295        MatGetRow_MPISBAIJ,
1296        MatRestoreRow_MPISBAIJ,
1297        MatMult_MPISBAIJ,
1298 /* 4*/ MatMultAdd_MPISBAIJ,
1299        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1300        MatMultAdd_MPISBAIJ,
1301        0,
1302        0,
1303        0,
1304 /*10*/ 0,
1305        0,
1306        0,
1307        MatRelax_MPISBAIJ,
1308        MatTranspose_MPISBAIJ,
1309 /*15*/ MatGetInfo_MPISBAIJ,
1310        MatEqual_MPISBAIJ,
1311        MatGetDiagonal_MPISBAIJ,
1312        MatDiagonalScale_MPISBAIJ,
1313        MatNorm_MPISBAIJ,
1314 /*20*/ MatAssemblyBegin_MPISBAIJ,
1315        MatAssemblyEnd_MPISBAIJ,
1316        0,
1317        MatSetOption_MPISBAIJ,
1318        MatZeroEntries_MPISBAIJ,
1319 /*25*/ 0,
1320        0,
1321        0,
1322        0,
1323        0,
1324 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1325        0,
1326        0,
1327        0,
1328        0,
1329 /*35*/ MatDuplicate_MPISBAIJ,
1330        0,
1331        0,
1332        0,
1333        0,
1334 /*40*/ 0,
1335        MatGetSubMatrices_MPISBAIJ,
1336        MatIncreaseOverlap_MPISBAIJ,
1337        MatGetValues_MPISBAIJ,
1338        0,
1339 /*45*/ MatPrintHelp_MPISBAIJ,
1340        MatScale_MPISBAIJ,
1341        0,
1342        0,
1343        0,
1344 /*50*/ 0,
1345        0,
1346        0,
1347        0,
1348        0,
1349 /*55*/ 0,
1350        0,
1351        MatSetUnfactored_MPISBAIJ,
1352        0,
1353        MatSetValuesBlocked_MPISBAIJ,
1354 /*60*/ 0,
1355        0,
1356        0,
1357        MatGetPetscMaps_Petsc,
1358        0,
1359 /*65*/ 0,
1360        0,
1361        0,
1362        0,
1363        0,
1364 /*70*/ MatGetRowMax_MPISBAIJ,
1365        0,
1366        0,
1367        0,
1368        0,
1369 /*75*/ 0,
1370        0,
1371        0,
1372        0,
1373        0,
1374 /*80*/ 0,
1375        0,
1376        0,
1377        0,
1378        MatLoad_MPISBAIJ,
1379 /*85*/ 0,
1380        0,
1381        0,
1382        0,
1383        0,
1384 /*90*/ 0,
1385        0,
1386        0,
1387        0,
1388        0,
1389 /*95*/ 0,
1390        0,
1391        0,
1392        0};
1393 
1394 
1395 EXTERN_C_BEGIN
1396 #undef __FUNCT__
1397 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1398 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1399 {
1400   PetscFunctionBegin;
1401   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1402   *iscopy = PETSC_FALSE;
1403   PetscFunctionReturn(0);
1404 }
1405 EXTERN_C_END
1406 
1407 EXTERN_C_BEGIN
1408 #undef __FUNCT__
1409 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1410 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1411 {
1412   Mat_MPISBAIJ   *b;
1413   PetscErrorCode ierr;
1414   PetscInt       i,mbs,Mbs;
1415 
1416   PetscFunctionBegin;
1417   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1418 
1419   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1420   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1421   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1422   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1423   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1424   if (d_nnz) {
1425     for (i=0; i<B->m/bs; i++) {
1426       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1427     }
1428   }
1429   if (o_nnz) {
1430     for (i=0; i<B->m/bs; i++) {
1431       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1432     }
1433   }
1434   B->preallocated = PETSC_TRUE;
1435   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1436   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1437   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1438   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1439 
1440   b   = (Mat_MPISBAIJ*)B->data;
1441   mbs = B->m/bs;
1442   Mbs = B->M/bs;
1443   if (mbs*bs != B->m) {
1444     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1445   }
1446 
1447   B->bs  = bs;
1448   b->bs2 = bs*bs;
1449   b->mbs = mbs;
1450   b->nbs = mbs;
1451   b->Mbs = Mbs;
1452   b->Nbs = Mbs;
1453 
1454   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1455   b->rowners[0]    = 0;
1456   for (i=2; i<=b->size; i++) {
1457     b->rowners[i] += b->rowners[i-1];
1458   }
1459   b->rstart    = b->rowners[b->rank];
1460   b->rend      = b->rowners[b->rank+1];
1461   b->cstart    = b->rstart;
1462   b->cend      = b->rend;
1463   for (i=0; i<=b->size; i++) {
1464     b->rowners_bs[i] = b->rowners[i]*bs;
1465   }
1466   b->rstart_bs = b-> rstart*bs;
1467   b->rend_bs   = b->rend*bs;
1468 
1469   b->cstart_bs = b->cstart*bs;
1470   b->cend_bs   = b->cend*bs;
1471 
1472   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1473   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1474   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1475   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1476   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1477 
1478   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1479   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1480   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1481   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1482   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1483 
1484   /* build cache for off array entries formed */
1485   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1486 
1487   PetscFunctionReturn(0);
1488 }
1489 EXTERN_C_END
1490 
1491 /*MC
1492    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1493    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1494 
1495    Options Database Keys:
1496 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1497 
1498   Level: beginner
1499 
1500 .seealso: MatCreateMPISBAIJ
1501 M*/
1502 
1503 EXTERN_C_BEGIN
1504 #undef __FUNCT__
1505 #define __FUNCT__ "MatCreate_MPISBAIJ"
1506 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1507 {
1508   Mat_MPISBAIJ   *b;
1509   PetscErrorCode ierr;
1510   PetscTruth     flg;
1511 
1512   PetscFunctionBegin;
1513 
1514   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1515   B->data = (void*)b;
1516   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1517 
1518   B->ops->destroy    = MatDestroy_MPISBAIJ;
1519   B->ops->view       = MatView_MPISBAIJ;
1520   B->mapping    = 0;
1521   B->factor     = 0;
1522   B->assembled  = PETSC_FALSE;
1523 
1524   B->insertmode = NOT_SET_VALUES;
1525   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1526   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1527 
1528   /* build local table of row and column ownerships */
1529   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1530   b->cowners    = b->rowners + b->size + 2;
1531   b->rowners_bs = b->cowners + b->size + 2;
1532   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1533 
1534   /* build cache for off array entries formed */
1535   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1536   b->donotstash  = PETSC_FALSE;
1537   b->colmap      = PETSC_NULL;
1538   b->garray      = PETSC_NULL;
1539   b->roworiented = PETSC_TRUE;
1540 
1541 #if defined(PETSC_USE_MAT_SINGLE)
1542   /* stuff for MatSetValues_XXX in single precision */
1543   b->setvalueslen     = 0;
1544   b->setvaluescopy    = PETSC_NULL;
1545 #endif
1546 
1547   /* stuff used in block assembly */
1548   b->barray       = 0;
1549 
1550   /* stuff used for matrix vector multiply */
1551   b->lvec         = 0;
1552   b->Mvctx        = 0;
1553   b->slvec0       = 0;
1554   b->slvec0b      = 0;
1555   b->slvec1       = 0;
1556   b->slvec1a      = 0;
1557   b->slvec1b      = 0;
1558   b->sMvctx       = 0;
1559 
1560   /* stuff for MatGetRow() */
1561   b->rowindices   = 0;
1562   b->rowvalues    = 0;
1563   b->getrowactive = PETSC_FALSE;
1564 
1565   /* hash table stuff */
1566   b->ht           = 0;
1567   b->hd           = 0;
1568   b->ht_size      = 0;
1569   b->ht_flag      = PETSC_FALSE;
1570   b->ht_fact      = 0;
1571   b->ht_total_ct  = 0;
1572   b->ht_insert_ct = 0;
1573 
1574   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1575   if (flg) {
1576     PetscReal fact = 1.39;
1577     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1578     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1579     if (fact <= 1.0) fact = 1.39;
1580     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1581     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1582   }
1583   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1584                                      "MatStoreValues_MPISBAIJ",
1585                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1586   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1587                                      "MatRetrieveValues_MPISBAIJ",
1588                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1589   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1590                                      "MatGetDiagonalBlock_MPISBAIJ",
1591                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1592   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1593                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1594                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1595   B->symmetric                  = PETSC_TRUE;
1596   B->structurally_symmetric     = PETSC_TRUE;
1597   B->symmetric_set              = PETSC_TRUE;
1598   B->structurally_symmetric_set = PETSC_TRUE;
1599   PetscFunctionReturn(0);
1600 }
1601 EXTERN_C_END
1602 
1603 /*MC
1604    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1605 
1606    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1607    and MATMPISBAIJ otherwise.
1608 
1609    Options Database Keys:
1610 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1611 
1612   Level: beginner
1613 
1614 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1615 M*/
1616 
1617 EXTERN_C_BEGIN
1618 #undef __FUNCT__
1619 #define __FUNCT__ "MatCreate_SBAIJ"
1620 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1621 {
1622   PetscErrorCode ierr;
1623   PetscMPIInt    size;
1624 
1625   PetscFunctionBegin;
1626   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1627   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1628   if (size == 1) {
1629     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1630   } else {
1631     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1632   }
1633   PetscFunctionReturn(0);
1634 }
1635 EXTERN_C_END
1636 
1637 #undef __FUNCT__
1638 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1639 /*@C
1640    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1641    the user should preallocate the matrix storage by setting the parameters
1642    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1643    performance can be increased by more than a factor of 50.
1644 
1645    Collective on Mat
1646 
1647    Input Parameters:
1648 +  A - the matrix
1649 .  bs   - size of blockk
1650 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1651            submatrix  (same for all local rows)
1652 .  d_nnz - array containing the number of block nonzeros in the various block rows
1653            in the upper triangular and diagonal part of the in diagonal portion of the local
1654            (possibly different for each block row) or PETSC_NULL.  You must leave room
1655            for the diagonal entry even if it is zero.
1656 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1657            submatrix (same for all local rows).
1658 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1659            off-diagonal portion of the local submatrix (possibly different for
1660            each block row) or PETSC_NULL.
1661 
1662 
1663    Options Database Keys:
1664 .   -mat_no_unroll - uses code that does not unroll the loops in the
1665                      block calculations (much slower)
1666 .   -mat_block_size - size of the blocks to use
1667 
1668    Notes:
1669 
1670    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1671    than it must be used on all processors that share the object for that argument.
1672 
1673    If the *_nnz parameter is given then the *_nz parameter is ignored
1674 
1675    Storage Information:
1676    For a square global matrix we define each processor's diagonal portion
1677    to be its local rows and the corresponding columns (a square submatrix);
1678    each processor's off-diagonal portion encompasses the remainder of the
1679    local matrix (a rectangular submatrix).
1680 
1681    The user can specify preallocated storage for the diagonal part of
1682    the local submatrix with either d_nz or d_nnz (not both).  Set
1683    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1684    memory allocation.  Likewise, specify preallocated storage for the
1685    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1686 
1687    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1688    the figure below we depict these three local rows and all columns (0-11).
1689 
1690 .vb
1691            0 1 2 3 4 5 6 7 8 9 10 11
1692           -------------------
1693    row 3  |  o o o d d d o o o o o o
1694    row 4  |  o o o d d d o o o o o o
1695    row 5  |  o o o d d d o o o o o o
1696           -------------------
1697 .ve
1698 
1699    Thus, any entries in the d locations are stored in the d (diagonal)
1700    submatrix, and any entries in the o locations are stored in the
1701    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1702    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1703 
1704    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1705    plus the diagonal part of the d matrix,
1706    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1707    In general, for PDE problems in which most nonzeros are near the diagonal,
1708    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1709    or you will get TERRIBLE performance; see the users' manual chapter on
1710    matrices.
1711 
1712    Level: intermediate
1713 
1714 .keywords: matrix, block, aij, compressed row, sparse, parallel
1715 
1716 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1717 @*/
1718 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1719 {
1720   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1721 
1722   PetscFunctionBegin;
1723   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1724   if (f) {
1725     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1726   }
1727   PetscFunctionReturn(0);
1728 }
1729 
1730 #undef __FUNCT__
1731 #define __FUNCT__ "MatCreateMPISBAIJ"
1732 /*@C
1733    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1734    (block compressed row).  For good matrix assembly performance
1735    the user should preallocate the matrix storage by setting the parameters
1736    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1737    performance can be increased by more than a factor of 50.
1738 
1739    Collective on MPI_Comm
1740 
1741    Input Parameters:
1742 +  comm - MPI communicator
1743 .  bs   - size of blockk
1744 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1745            This value should be the same as the local size used in creating the
1746            y vector for the matrix-vector product y = Ax.
1747 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1748            This value should be the same as the local size used in creating the
1749            x vector for the matrix-vector product y = Ax.
1750 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1751 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1752 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1753            submatrix  (same for all local rows)
1754 .  d_nnz - array containing the number of block nonzeros in the various block rows
1755            in the upper triangular portion of the in diagonal portion of the local
1756            (possibly different for each block block row) or PETSC_NULL.
1757            You must leave room for the diagonal entry even if it is zero.
1758 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1759            submatrix (same for all local rows).
1760 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1761            off-diagonal portion of the local submatrix (possibly different for
1762            each block row) or PETSC_NULL.
1763 
1764    Output Parameter:
1765 .  A - the matrix
1766 
1767    Options Database Keys:
1768 .   -mat_no_unroll - uses code that does not unroll the loops in the
1769                      block calculations (much slower)
1770 .   -mat_block_size - size of the blocks to use
1771 .   -mat_mpi - use the parallel matrix data structures even on one processor
1772                (defaults to using SeqBAIJ format on one processor)
1773 
1774    Notes:
1775    The number of rows and columns must be divisible by blocksize.
1776 
1777    The user MUST specify either the local or global matrix dimensions
1778    (possibly both).
1779 
1780    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1781    than it must be used on all processors that share the object for that argument.
1782 
1783    If the *_nnz parameter is given then the *_nz parameter is ignored
1784 
1785    Storage Information:
1786    For a square global matrix we define each processor's diagonal portion
1787    to be its local rows and the corresponding columns (a square submatrix);
1788    each processor's off-diagonal portion encompasses the remainder of the
1789    local matrix (a rectangular submatrix).
1790 
1791    The user can specify preallocated storage for the diagonal part of
1792    the local submatrix with either d_nz or d_nnz (not both).  Set
1793    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1794    memory allocation.  Likewise, specify preallocated storage for the
1795    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1796 
1797    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1798    the figure below we depict these three local rows and all columns (0-11).
1799 
1800 .vb
1801            0 1 2 3 4 5 6 7 8 9 10 11
1802           -------------------
1803    row 3  |  o o o d d d o o o o o o
1804    row 4  |  o o o d d d o o o o o o
1805    row 5  |  o o o d d d o o o o o o
1806           -------------------
1807 .ve
1808 
1809    Thus, any entries in the d locations are stored in the d (diagonal)
1810    submatrix, and any entries in the o locations are stored in the
1811    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1812    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1813 
1814    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1815    plus the diagonal part of the d matrix,
1816    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1817    In general, for PDE problems in which most nonzeros are near the diagonal,
1818    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1819    or you will get TERRIBLE performance; see the users' manual chapter on
1820    matrices.
1821 
1822    Level: intermediate
1823 
1824 .keywords: matrix, block, aij, compressed row, sparse, parallel
1825 
1826 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1827 @*/
1828 
1829 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1830 {
1831   PetscErrorCode ierr;
1832   PetscMPIInt    size;
1833 
1834   PetscFunctionBegin;
1835   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1836   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1837   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1838   if (size > 1) {
1839     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1840     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1841   } else {
1842     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1843     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1844   }
1845   PetscFunctionReturn(0);
1846 }
1847 
1848 
1849 #undef __FUNCT__
1850 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1851 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1852 {
1853   Mat            mat;
1854   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1855   PetscErrorCode ierr;
1856   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1857   PetscScalar    *array;
1858 
1859   PetscFunctionBegin;
1860   *newmat       = 0;
1861   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1862   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1863   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1864   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1865 
1866   mat->factor       = matin->factor;
1867   mat->preallocated = PETSC_TRUE;
1868   mat->assembled    = PETSC_TRUE;
1869   mat->insertmode   = NOT_SET_VALUES;
1870 
1871   a = (Mat_MPISBAIJ*)mat->data;
1872   mat->bs  = matin->bs;
1873   a->bs2   = oldmat->bs2;
1874   a->mbs   = oldmat->mbs;
1875   a->nbs   = oldmat->nbs;
1876   a->Mbs   = oldmat->Mbs;
1877   a->Nbs   = oldmat->Nbs;
1878 
1879   a->rstart       = oldmat->rstart;
1880   a->rend         = oldmat->rend;
1881   a->cstart       = oldmat->cstart;
1882   a->cend         = oldmat->cend;
1883   a->size         = oldmat->size;
1884   a->rank         = oldmat->rank;
1885   a->donotstash   = oldmat->donotstash;
1886   a->roworiented  = oldmat->roworiented;
1887   a->rowindices   = 0;
1888   a->rowvalues    = 0;
1889   a->getrowactive = PETSC_FALSE;
1890   a->barray       = 0;
1891   a->rstart_bs    = oldmat->rstart_bs;
1892   a->rend_bs      = oldmat->rend_bs;
1893   a->cstart_bs    = oldmat->cstart_bs;
1894   a->cend_bs      = oldmat->cend_bs;
1895 
1896   /* hash table stuff */
1897   a->ht           = 0;
1898   a->hd           = 0;
1899   a->ht_size      = 0;
1900   a->ht_flag      = oldmat->ht_flag;
1901   a->ht_fact      = oldmat->ht_fact;
1902   a->ht_total_ct  = 0;
1903   a->ht_insert_ct = 0;
1904 
1905   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1906   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1907   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1908   if (oldmat->colmap) {
1909 #if defined (PETSC_USE_CTABLE)
1910     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1911 #else
1912     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1913     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1914     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1915 #endif
1916   } else a->colmap = 0;
1917 
1918   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1919     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1920     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1921     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1922   } else a->garray = 0;
1923 
1924   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1925   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1926   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1927   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1928 
1929   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1930   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1931   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1932   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1933 
1934   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1935   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1936   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1937   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1938   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1939   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1940   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1941   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1942   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1943   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1944   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
1945   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
1946   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
1947 
1948   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1949   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1950   a->sMvctx = oldmat->sMvctx;
1951   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
1952 
1953   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1954   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1955   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1956   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1957   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1958   *newmat = mat;
1959   PetscFunctionReturn(0);
1960 }
1961 
1962 #include "petscsys.h"
1963 
1964 #undef __FUNCT__
1965 #define __FUNCT__ "MatLoad_MPISBAIJ"
1966 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
1967 {
1968   Mat            A;
1969   PetscErrorCode ierr;
1970   PetscInt       i,nz,j,rstart,rend;
1971   PetscScalar    *vals,*buf;
1972   MPI_Comm       comm = ((PetscObject)viewer)->comm;
1973   MPI_Status     status;
1974   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
1975   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
1976   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
1977   PetscInt       bs=1,Mbs,mbs,extra_rows;
1978   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
1979   PetscInt       dcount,kmax,k,nzcount,tmp;
1980   int            fd;
1981 
1982   PetscFunctionBegin;
1983   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1984 
1985   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1986   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
1987   if (!rank) {
1988     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
1989     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
1990     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
1991     if (header[3] < 0) {
1992       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
1993     }
1994   }
1995 
1996   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
1997   M = header[1]; N = header[2];
1998 
1999   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2000 
2001   /*
2002      This code adds extra rows to make sure the number of rows is
2003      divisible by the blocksize
2004   */
2005   Mbs        = M/bs;
2006   extra_rows = bs - M + bs*(Mbs);
2007   if (extra_rows == bs) extra_rows = 0;
2008   else                  Mbs++;
2009   if (extra_rows &&!rank) {
2010     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2011   }
2012 
2013   /* determine ownership of all rows */
2014   mbs        = Mbs/size + ((Mbs % size) > rank);
2015   m          = mbs*bs;
2016   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2017   browners   = rowners + size + 1;
2018   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2019   rowners[0] = 0;
2020   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2021   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2022   rstart = rowners[rank];
2023   rend   = rowners[rank+1];
2024 
2025   /* distribute row lengths to all processors */
2026   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2027   if (!rank) {
2028     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2029     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2030     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2031     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2032     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2033     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2034     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2035   } else {
2036     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2037   }
2038 
2039   if (!rank) {   /* procs[0] */
2040     /* calculate the number of nonzeros on each processor */
2041     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2042     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2043     for (i=0; i<size; i++) {
2044       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2045         procsnz[i] += rowlengths[j];
2046       }
2047     }
2048     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2049 
2050     /* determine max buffer needed and allocate it */
2051     maxnz = 0;
2052     for (i=0; i<size; i++) {
2053       maxnz = PetscMax(maxnz,procsnz[i]);
2054     }
2055     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2056 
2057     /* read in my part of the matrix column indices  */
2058     nz     = procsnz[0];
2059     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2060     mycols = ibuf;
2061     if (size == 1)  nz -= extra_rows;
2062     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2063     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2064 
2065     /* read in every ones (except the last) and ship off */
2066     for (i=1; i<size-1; i++) {
2067       nz   = procsnz[i];
2068       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2069       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2070     }
2071     /* read in the stuff for the last proc */
2072     if (size != 1) {
2073       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2074       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2075       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2076       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2077     }
2078     ierr = PetscFree(cols);CHKERRQ(ierr);
2079   } else {  /* procs[i], i>0 */
2080     /* determine buffer space needed for message */
2081     nz = 0;
2082     for (i=0; i<m; i++) {
2083       nz += locrowlens[i];
2084     }
2085     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2086     mycols = ibuf;
2087     /* receive message of column indices*/
2088     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2089     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2090     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2091   }
2092 
2093   /* loop over local rows, determining number of off diagonal entries */
2094   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2095   odlens   = dlens + (rend-rstart);
2096   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2097   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2098   masked1  = mask    + Mbs;
2099   masked2  = masked1 + Mbs;
2100   rowcount = 0; nzcount = 0;
2101   for (i=0; i<mbs; i++) {
2102     dcount  = 0;
2103     odcount = 0;
2104     for (j=0; j<bs; j++) {
2105       kmax = locrowlens[rowcount];
2106       for (k=0; k<kmax; k++) {
2107         tmp = mycols[nzcount++]/bs; /* block col. index */
2108         if (!mask[tmp]) {
2109           mask[tmp] = 1;
2110           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2111           else masked1[dcount++] = tmp; /* entry in diag portion */
2112         }
2113       }
2114       rowcount++;
2115     }
2116 
2117     dlens[i]  = dcount;  /* d_nzz[i] */
2118     odlens[i] = odcount; /* o_nzz[i] */
2119 
2120     /* zero out the mask elements we set */
2121     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2122     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2123   }
2124 
2125   /* create our matrix */
2126   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2127   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2128   ierr = MatSetType(A,type);CHKERRQ(ierr);
2129   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2130   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2131 
2132   if (!rank) {
2133     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2134     /* read in my part of the matrix numerical values  */
2135     nz = procsnz[0];
2136     vals = buf;
2137     mycols = ibuf;
2138     if (size == 1)  nz -= extra_rows;
2139     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2140     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2141 
2142     /* insert into matrix */
2143     jj      = rstart*bs;
2144     for (i=0; i<m; i++) {
2145       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2146       mycols += locrowlens[i];
2147       vals   += locrowlens[i];
2148       jj++;
2149     }
2150 
2151     /* read in other processors (except the last one) and ship out */
2152     for (i=1; i<size-1; i++) {
2153       nz   = procsnz[i];
2154       vals = buf;
2155       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2156       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2157     }
2158     /* the last proc */
2159     if (size != 1){
2160       nz   = procsnz[i] - extra_rows;
2161       vals = buf;
2162       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2163       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2164       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2165     }
2166     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2167 
2168   } else {
2169     /* receive numeric values */
2170     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2171 
2172     /* receive message of values*/
2173     vals   = buf;
2174     mycols = ibuf;
2175     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2176     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2177     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2178 
2179     /* insert into matrix */
2180     jj      = rstart*bs;
2181     for (i=0; i<m; i++) {
2182       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2183       mycols += locrowlens[i];
2184       vals   += locrowlens[i];
2185       jj++;
2186     }
2187   }
2188 
2189   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2190   ierr = PetscFree(buf);CHKERRQ(ierr);
2191   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2192   ierr = PetscFree(rowners);CHKERRQ(ierr);
2193   ierr = PetscFree(dlens);CHKERRQ(ierr);
2194   ierr = PetscFree(mask);CHKERRQ(ierr);
2195   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2196   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2197   *newmat = A;
2198   PetscFunctionReturn(0);
2199 }
2200 
2201 #undef __FUNCT__
2202 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2203 /*XXXXX@
2204    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2205 
2206    Input Parameters:
2207 .  mat  - the matrix
2208 .  fact - factor
2209 
2210    Collective on Mat
2211 
2212    Level: advanced
2213 
2214   Notes:
2215    This can also be set by the command line option: -mat_use_hash_table fact
2216 
2217 .keywords: matrix, hashtable, factor, HT
2218 
2219 .seealso: MatSetOption()
2220 @XXXXX*/
2221 
2222 
2223 #undef __FUNCT__
2224 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2225 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2226 {
2227   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2228   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2229   PetscReal      atmp;
2230   PetscReal      *work,*svalues,*rvalues;
2231   PetscErrorCode ierr;
2232   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2233   PetscMPIInt    rank,size;
2234   PetscInt       *rowners_bs,dest,count,source;
2235   PetscScalar    *va;
2236   MatScalar      *ba;
2237   MPI_Status     stat;
2238 
2239   PetscFunctionBegin;
2240   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2241   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2242 
2243   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2244   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2245 
2246   bs   = A->bs;
2247   mbs  = a->mbs;
2248   Mbs  = a->Mbs;
2249   ba   = b->a;
2250   bi   = b->i;
2251   bj   = b->j;
2252 
2253   /* find ownerships */
2254   rowners_bs = a->rowners_bs;
2255 
2256   /* each proc creates an array to be distributed */
2257   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2258   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2259 
2260   /* row_max for B */
2261   if (rank != size-1){
2262     for (i=0; i<mbs; i++) {
2263       ncols = bi[1] - bi[0]; bi++;
2264       brow  = bs*i;
2265       for (j=0; j<ncols; j++){
2266         bcol = bs*(*bj);
2267         for (kcol=0; kcol<bs; kcol++){
2268           col = bcol + kcol;                 /* local col index */
2269           col += rowners_bs[rank+1];      /* global col index */
2270           for (krow=0; krow<bs; krow++){
2271             atmp = PetscAbsScalar(*ba); ba++;
2272             row = brow + krow;    /* local row index */
2273             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2274             if (work[col] < atmp) work[col] = atmp;
2275           }
2276         }
2277         bj++;
2278       }
2279     }
2280 
2281     /* send values to its owners */
2282     for (dest=rank+1; dest<size; dest++){
2283       svalues = work + rowners_bs[dest];
2284       count   = rowners_bs[dest+1]-rowners_bs[dest];
2285       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2286     }
2287   }
2288 
2289   /* receive values */
2290   if (rank){
2291     rvalues = work;
2292     count   = rowners_bs[rank+1]-rowners_bs[rank];
2293     for (source=0; source<rank; source++){
2294       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2295       /* process values */
2296       for (i=0; i<count; i++){
2297         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2298       }
2299     }
2300   }
2301 
2302   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2303   ierr = PetscFree(work);CHKERRQ(ierr);
2304   PetscFunctionReturn(0);
2305 }
2306 
2307 #undef __FUNCT__
2308 #define __FUNCT__ "MatRelax_MPISBAIJ"
2309 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2310 {
2311   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2312   PetscErrorCode ierr;
2313   PetscInt       mbs=mat->mbs,bs=matin->bs;
2314   PetscScalar    *x,*b,*ptr,zero=0.0;
2315   Vec            bb1;
2316 
2317   PetscFunctionBegin;
2318   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2319   if (bs > 1)
2320     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2321 
2322   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2323     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2324       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2325       its--;
2326     }
2327 
2328     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2329     while (its--){
2330 
2331       /* lower triangular part: slvec0b = - B^T*xx */
2332       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2333 
2334       /* copy xx into slvec0a */
2335       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2336       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2337       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2338       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2339 
2340       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2341 
2342       /* copy bb into slvec1a */
2343       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2344       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2345       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2346       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2347 
2348       /* set slvec1b = 0 */
2349       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2350 
2351       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2352       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2353       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2354       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2355 
2356       /* upper triangular part: bb1 = bb1 - B*x */
2357       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2358 
2359       /* local diagonal sweep */
2360       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2361     }
2362     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2363   } else {
2364     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2365   }
2366   PetscFunctionReturn(0);
2367 }
2368 
2369 #undef __FUNCT__
2370 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2371 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2372 {
2373   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2374   PetscErrorCode ierr;
2375   Vec            lvec1,bb1;
2376 
2377   PetscFunctionBegin;
2378   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2379   if (matin->bs > 1)
2380     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2381 
2382   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2383     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2384       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2385       its--;
2386     }
2387 
2388     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2389     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2390     while (its--){
2391       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2392 
2393       /* lower diagonal part: bb1 = bb - B^T*xx */
2394       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2395       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2396 
2397       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2398       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2399       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2400 
2401       /* upper diagonal part: bb1 = bb1 - B*x */
2402       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2403       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2404 
2405       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2406 
2407       /* diagonal sweep */
2408       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2409     }
2410     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2411     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2412   } else {
2413     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2414   }
2415   PetscFunctionReturn(0);
2416 }
2417 
2418