xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 4e78640e53e28f1a17004e3585001bcd87aef9bf)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=baij->rstart_bs;
218   PetscInt       rend_orig=baij->rend_bs,cstart_orig=baij->cstart_bs;
219   PetscInt       cend_orig=baij->cend_bs,bs=mat->bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->M-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs) continue;    /* ignore lower triangular blocks */
256         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
257           col = in[j] - cstart_orig;          /* local col index */
258           brow = row/bs; bcol = col/bs;
259           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
260           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
261           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
262           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
263         } else if (in[j] < 0) continue;
264 #if defined(PETSC_USE_DEBUG)
265         else if (in[j] >= mat->N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->N-1);}
266 #endif
267         else {  /* off-diag entry (B) */
268           if (mat->was_assembled) {
269             if (!baij->colmap) {
270               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
271             }
272 #if defined (PETSC_USE_CTABLE)
273             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
274             col  = col - 1;
275 #else
276             col = baij->colmap[in[j]/bs] - 1;
277 #endif
278             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
279               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
280               col =  in[j];
281               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
282               B = baij->B;
283               b = (Mat_SeqBAIJ*)(B)->data;
284               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
285               ba=b->a;
286             } else col += in[j]%bs;
287           } else col = in[j];
288           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
289           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
290           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
291         }
292       }
293     } else {  /* off processor entry */
294       if (!baij->donotstash) {
295         n_loc = 0;
296         for (j=0; j<n; j++){
297           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
298           in_loc[n_loc] = in[j];
299           if (roworiented) {
300             v_loc[n_loc] = v[i*n+j];
301           } else {
302             v_loc[n_loc] = v[j*m+i];
303           }
304           n_loc++;
305         }
306         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
307       }
308     }
309   }
310 
311   if(!baij->donotstash){
312     ierr = PetscFree(in_loc);CHKERRQ(ierr);
313     ierr = PetscFree(v_loc);CHKERRQ(ierr);
314   }
315   PetscFunctionReturn(0);
316 }
317 
318 #undef __FUNCT__
319 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
320 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
321 {
322   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
323   const MatScalar *value;
324   MatScalar       *barray=baij->barray;
325   PetscTruth      roworiented = baij->roworiented;
326   PetscErrorCode  ierr;
327   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstart;
328   PetscInt        rend=baij->rend,cstart=baij->cstart,stepval;
329   PetscInt        cend=baij->cend,bs=mat->bs,bs2=baij->bs2;
330 
331   PetscFunctionBegin;
332   if(!barray) {
333     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
334     baij->barray = barray;
335   }
336 
337   if (roworiented) {
338     stepval = (n-1)*bs;
339   } else {
340     stepval = (m-1)*bs;
341   }
342   for (i=0; i<m; i++) {
343     if (im[i] < 0) continue;
344 #if defined(PETSC_USE_DEBUG)
345     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
346 #endif
347     if (im[i] >= rstart && im[i] < rend) {
348       row = im[i] - rstart;
349       for (j=0; j<n; j++) {
350         /* If NumCol = 1 then a copy is not required */
351         if ((roworiented) && (n == 1)) {
352           barray = (MatScalar*) v + i*bs2;
353         } else if((!roworiented) && (m == 1)) {
354           barray = (MatScalar*) v + j*bs2;
355         } else { /* Here a copy is required */
356           if (roworiented) {
357             value = v + i*(stepval+bs)*bs + j*bs;
358           } else {
359             value = v + j*(stepval+bs)*bs + i*bs;
360           }
361           for (ii=0; ii<bs; ii++,value+=stepval) {
362             for (jj=0; jj<bs; jj++) {
363               *barray++  = *value++;
364             }
365           }
366           barray -=bs2;
367         }
368 
369         if (in[j] >= cstart && in[j] < cend){
370           col  = in[j] - cstart;
371           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
372         }
373         else if (in[j] < 0) continue;
374 #if defined(PETSC_USE_DEBUG)
375         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
376 #endif
377         else {
378           if (mat->was_assembled) {
379             if (!baij->colmap) {
380               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
381             }
382 
383 #if defined(PETSC_USE_DEBUG)
384 #if defined (PETSC_USE_CTABLE)
385             { PetscInt data;
386               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
387               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
388             }
389 #else
390             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
391 #endif
392 #endif
393 #if defined (PETSC_USE_CTABLE)
394 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
395             col  = (col - 1)/bs;
396 #else
397             col = (baij->colmap[in[j]] - 1)/bs;
398 #endif
399             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
400               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
401               col =  in[j];
402             }
403           }
404           else col = in[j];
405           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
406         }
407       }
408     } else {
409       if (!baij->donotstash) {
410         if (roworiented) {
411           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
412         } else {
413           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
414         }
415       }
416     }
417   }
418   PetscFunctionReturn(0);
419 }
420 
421 #undef __FUNCT__
422 #define __FUNCT__ "MatGetValues_MPISBAIJ"
423 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
424 {
425   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
426   PetscErrorCode ierr;
427   PetscInt       bs=mat->bs,i,j,bsrstart = baij->rstart*bs,bsrend = baij->rend*bs;
428   PetscInt       bscstart = baij->cstart*bs,bscend = baij->cend*bs,row,col,data;
429 
430   PetscFunctionBegin;
431   for (i=0; i<m; i++) {
432     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
433     if (idxm[i] >= mat->M) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->M-1);
434     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
435       row = idxm[i] - bsrstart;
436       for (j=0; j<n; j++) {
437         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
438         if (idxn[j] >= mat->N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->N-1);
439         if (idxn[j] >= bscstart && idxn[j] < bscend){
440           col = idxn[j] - bscstart;
441           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
442         } else {
443           if (!baij->colmap) {
444             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
445           }
446 #if defined (PETSC_USE_CTABLE)
447           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
448           data --;
449 #else
450           data = baij->colmap[idxn[j]/bs]-1;
451 #endif
452           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
453           else {
454             col  = data + idxn[j]%bs;
455             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
456           }
457         }
458       }
459     } else {
460       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
461     }
462   }
463  PetscFunctionReturn(0);
464 }
465 
466 #undef __FUNCT__
467 #define __FUNCT__ "MatNorm_MPISBAIJ"
468 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
469 {
470   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
471   PetscErrorCode ierr;
472   PetscReal      sum[2],*lnorm2;
473 
474   PetscFunctionBegin;
475   if (baij->size == 1) {
476     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
477   } else {
478     if (type == NORM_FROBENIUS) {
479       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
480       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
481       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
482       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
483       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
484       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
485       *norm = sqrt(sum[0] + 2*sum[1]);
486       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
487     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
488       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
489       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
490       PetscReal    *rsum,*rsum2,vabs;
491       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstart,nz;
492       PetscInt     brow,bcol,col,bs=baij->A->bs,row,grow,gcol,mbs=amat->mbs;
493       MatScalar    *v;
494 
495       ierr  = PetscMalloc((2*mat->N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
496       rsum2 = rsum + mat->N;
497       ierr  = PetscMemzero(rsum,mat->N*sizeof(PetscReal));CHKERRQ(ierr);
498       /* Amat */
499       v = amat->a; jj = amat->j;
500       for (brow=0; brow<mbs; brow++) {
501         grow = bs*(rstart + brow);
502         nz = amat->i[brow+1] - amat->i[brow];
503         for (bcol=0; bcol<nz; bcol++){
504           gcol = bs*(rstart + *jj); jj++;
505           for (col=0; col<bs; col++){
506             for (row=0; row<bs; row++){
507               vabs = PetscAbsScalar(*v); v++;
508               rsum[gcol+col] += vabs;
509               /* non-diagonal block */
510               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
511             }
512           }
513         }
514       }
515       /* Bmat */
516       v = bmat->a; jj = bmat->j;
517       for (brow=0; brow<mbs; brow++) {
518         grow = bs*(rstart + brow);
519         nz = bmat->i[brow+1] - bmat->i[brow];
520         for (bcol=0; bcol<nz; bcol++){
521           gcol = bs*garray[*jj]; jj++;
522           for (col=0; col<bs; col++){
523             for (row=0; row<bs; row++){
524               vabs = PetscAbsScalar(*v); v++;
525               rsum[gcol+col] += vabs;
526               rsum[grow+row] += vabs;
527             }
528           }
529         }
530       }
531       ierr = MPI_Allreduce(rsum,rsum2,mat->N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
532       *norm = 0.0;
533       for (col=0; col<mat->N; col++) {
534         if (rsum2[col] > *norm) *norm = rsum2[col];
535       }
536       ierr = PetscFree(rsum);CHKERRQ(ierr);
537     } else {
538       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
539     }
540   }
541   PetscFunctionReturn(0);
542 }
543 
544 #undef __FUNCT__
545 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
546 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
547 {
548   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
549   PetscErrorCode ierr;
550   PetscInt       nstash,reallocs;
551   InsertMode     addv;
552 
553   PetscFunctionBegin;
554   if (baij->donotstash) {
555     PetscFunctionReturn(0);
556   }
557 
558   /* make sure all processors are either in INSERTMODE or ADDMODE */
559   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
560   if (addv == (ADD_VALUES|INSERT_VALUES)) {
561     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
562   }
563   mat->insertmode = addv; /* in case this processor had no cache */
564 
565   ierr = MatStashScatterBegin_Private(&mat->stash,baij->rowners_bs);CHKERRQ(ierr);
566   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rowners);CHKERRQ(ierr);
567   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
568   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Stash has %D entries,uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
569   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
570   ierr = PetscLogInfo((0,"MatAssemblyBegin_MPISBAIJ:Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs));CHKERRQ(ierr);
571   PetscFunctionReturn(0);
572 }
573 
574 #undef __FUNCT__
575 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
576 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
577 {
578   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
579   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
580   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
581   PetscErrorCode ierr;
582   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
583   PetscInt       *row,*col,other_disassembled;
584   PetscMPIInt    n;
585   PetscTruth     r1,r2,r3;
586   MatScalar      *val;
587   InsertMode     addv = mat->insertmode;
588 
589   PetscFunctionBegin;
590 
591   if (!baij->donotstash) {
592     while (1) {
593       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
594       if (!flg) break;
595 
596       for (i=0; i<n;) {
597         /* Now identify the consecutive vals belonging to the same row */
598         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
599         if (j < n) ncols = j-i;
600         else       ncols = n-i;
601         /* Now assemble all these values with a single function call */
602         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
603         i = j;
604       }
605     }
606     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
607     /* Now process the block-stash. Since the values are stashed column-oriented,
608        set the roworiented flag to column oriented, and after MatSetValues()
609        restore the original flags */
610     r1 = baij->roworiented;
611     r2 = a->roworiented;
612     r3 = b->roworiented;
613     baij->roworiented = PETSC_FALSE;
614     a->roworiented    = PETSC_FALSE;
615     b->roworiented    = PETSC_FALSE;
616     while (1) {
617       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
618       if (!flg) break;
619 
620       for (i=0; i<n;) {
621         /* Now identify the consecutive vals belonging to the same row */
622         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
623         if (j < n) ncols = j-i;
624         else       ncols = n-i;
625         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
626         i = j;
627       }
628     }
629     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
630     baij->roworiented = r1;
631     a->roworiented    = r2;
632     b->roworiented    = r3;
633   }
634 
635   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
636   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
637 
638   /* determine if any processor has disassembled, if so we must
639      also disassemble ourselfs, in order that we may reassemble. */
640   /*
641      if nonzero structure of submatrix B cannot change then we know that
642      no processor disassembled thus we can skip this stuff
643   */
644   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
645     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
646     if (mat->was_assembled && !other_disassembled) {
647       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
648     }
649   }
650 
651   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
652     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
653   }
654   b->compressedrow.use = PETSC_TRUE;
655   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
656   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
657 
658   if (baij->rowvalues) {
659     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
660     baij->rowvalues = 0;
661   }
662 
663   PetscFunctionReturn(0);
664 }
665 
666 #undef __FUNCT__
667 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
668 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
669 {
670   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
671   PetscErrorCode    ierr;
672   PetscInt          bs = mat->bs;
673   PetscMPIInt       size = baij->size,rank = baij->rank;
674   PetscTruth        iascii,isdraw;
675   PetscViewer       sviewer;
676   PetscViewerFormat format;
677 
678   PetscFunctionBegin;
679   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
680   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
681   if (iascii) {
682     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
683     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
684       MatInfo info;
685       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
686       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
687       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
688               rank,mat->m,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
689               mat->bs,(PetscInt)info.memory);CHKERRQ(ierr);
690       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
691       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
692       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
694       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
695       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
696       PetscFunctionReturn(0);
697     } else if (format == PETSC_VIEWER_ASCII_INFO) {
698       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
699       PetscFunctionReturn(0);
700     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
701       PetscFunctionReturn(0);
702     }
703   }
704 
705   if (isdraw) {
706     PetscDraw       draw;
707     PetscTruth isnull;
708     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
709     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
710   }
711 
712   if (size == 1) {
713     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
714     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
715   } else {
716     /* assemble the entire matrix onto first processor. */
717     Mat         A;
718     Mat_SeqSBAIJ *Aloc;
719     Mat_SeqBAIJ *Bloc;
720     PetscInt         M = mat->M,N = mat->N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
721     MatScalar   *a;
722 
723     /* Should this be the same type as mat? */
724     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
725     if (!rank) {
726       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
727     } else {
728       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
729     }
730     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
731     ierr = MatMPISBAIJSetPreallocation(A,mat->bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
732     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
733 
734     /* copy over the A part */
735     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
736     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
737     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
738 
739     for (i=0; i<mbs; i++) {
740       rvals[0] = bs*(baij->rstart + i);
741       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
742       for (j=ai[i]; j<ai[i+1]; j++) {
743         col = (baij->cstart+aj[j])*bs;
744         for (k=0; k<bs; k++) {
745           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
746           col++; a += bs;
747         }
748       }
749     }
750     /* copy over the B part */
751     Bloc = (Mat_SeqBAIJ*)baij->B->data;
752     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
753     for (i=0; i<mbs; i++) {
754       rvals[0] = bs*(baij->rstart + i);
755       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
756       for (j=ai[i]; j<ai[i+1]; j++) {
757         col = baij->garray[aj[j]]*bs;
758         for (k=0; k<bs; k++) {
759           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
760           col++; a += bs;
761         }
762       }
763     }
764     ierr = PetscFree(rvals);CHKERRQ(ierr);
765     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
766     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
767     /*
768        Everyone has to call to draw the matrix since the graphics waits are
769        synchronized across all processors that share the PetscDraw object
770     */
771     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
772     if (!rank) {
773       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
774       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
775     }
776     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
777     ierr = MatDestroy(A);CHKERRQ(ierr);
778   }
779   PetscFunctionReturn(0);
780 }
781 
782 #undef __FUNCT__
783 #define __FUNCT__ "MatView_MPISBAIJ"
784 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
785 {
786   PetscErrorCode ierr;
787   PetscTruth     iascii,isdraw,issocket,isbinary;
788 
789   PetscFunctionBegin;
790   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
791   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
792   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
793   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
794   if (iascii || isdraw || issocket || isbinary) {
795     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
796   } else {
797     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
798   }
799   PetscFunctionReturn(0);
800 }
801 
802 #undef __FUNCT__
803 #define __FUNCT__ "MatDestroy_MPISBAIJ"
804 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
805 {
806   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
807   PetscErrorCode ierr;
808 
809   PetscFunctionBegin;
810 #if defined(PETSC_USE_LOG)
811   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->M,mat->N);
812 #endif
813   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
814   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
815   ierr = PetscFree(baij->rowners);CHKERRQ(ierr);
816   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
817   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
818 #if defined (PETSC_USE_CTABLE)
819   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
820 #else
821   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
822 #endif
823   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
824   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
825   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
826   if (baij->slvec0) {
827     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
828     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
829   }
830   if (baij->slvec1) {
831     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
832     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
834   }
835   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
836   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
837   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
838   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
839 #if defined(PETSC_USE_MAT_SINGLE)
840   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
841 #endif
842   ierr = PetscFree(baij);CHKERRQ(ierr);
843 
844   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
845   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
846   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
847   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
848   PetscFunctionReturn(0);
849 }
850 
851 #undef __FUNCT__
852 #define __FUNCT__ "MatMult_MPISBAIJ"
853 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
854 {
855   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
856   PetscErrorCode ierr;
857   PetscInt       nt,mbs=a->mbs,bs=A->bs;
858   PetscScalar    *x,*from,zero=0.0;
859 
860   PetscFunctionBegin;
861   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
862   if (nt != A->n) {
863     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
864   }
865   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
866   if (nt != A->m) {
867     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
868   }
869 
870   /* diagonal part */
871   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
872   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
873 
874   /* subdiagonal part */
875   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
876 
877   /* copy x into the vec slvec0 */
878   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
879   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
880   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
881   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
882 
883   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
884   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
885   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
886 
887   /* supperdiagonal part */
888   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
889 
890   PetscFunctionReturn(0);
891 }
892 
893 #undef __FUNCT__
894 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
895 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
896 {
897   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
898   PetscErrorCode ierr;
899   PetscInt       nt;
900 
901   PetscFunctionBegin;
902   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
903   if (nt != A->n) {
904     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
905   }
906   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
907   if (nt != A->m) {
908     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
909   }
910 
911   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
912   /* do diagonal part */
913   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
914   /* do supperdiagonal part */
915   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
916   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
917   /* do subdiagonal part */
918   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
919   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
920   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
921 
922   PetscFunctionReturn(0);
923 }
924 
925 #undef __FUNCT__
926 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
927 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
928 {
929   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
930   PetscErrorCode ierr;
931   PetscInt       mbs=a->mbs,bs=A->bs;
932   PetscScalar    *x,*from,zero=0.0;
933 
934   PetscFunctionBegin;
935   /*
936   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
937   PetscSynchronizedFlush(A->comm);
938   */
939   /* diagonal part */
940   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
941   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
942 
943   /* subdiagonal part */
944   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
945 
946   /* copy x into the vec slvec0 */
947   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
948   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
949   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
950   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
951 
952   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
953   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
954   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
955 
956   /* supperdiagonal part */
957   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
958 
959   PetscFunctionReturn(0);
960 }
961 
962 #undef __FUNCT__
963 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
964 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
965 {
966   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
967   PetscErrorCode ierr;
968 
969   PetscFunctionBegin;
970   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
971   /* do diagonal part */
972   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
973   /* do supperdiagonal part */
974   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
975   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
976 
977   /* do subdiagonal part */
978   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
979   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
980   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
981 
982   PetscFunctionReturn(0);
983 }
984 
985 /*
986   This only works correctly for square matrices where the subblock A->A is the
987    diagonal block
988 */
989 #undef __FUNCT__
990 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
991 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
992 {
993   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
994   PetscErrorCode ierr;
995 
996   PetscFunctionBegin;
997   /* if (a->M != a->N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
998   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
999   PetscFunctionReturn(0);
1000 }
1001 
1002 #undef __FUNCT__
1003 #define __FUNCT__ "MatScale_MPISBAIJ"
1004 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1005 {
1006   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1007   PetscErrorCode ierr;
1008 
1009   PetscFunctionBegin;
1010   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1011   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1012   PetscFunctionReturn(0);
1013 }
1014 
1015 #undef __FUNCT__
1016 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1017 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1018 {
1019   PetscFunctionBegin;
1020   if (matin) SETERRQ(PETSC_ERR_SUP,"MatGetRow is not supported for SBAIJ matrix format");
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1026 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1027 {
1028   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1029 
1030   PetscFunctionBegin;
1031   if (!baij->getrowactive) {
1032     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1033   }
1034   baij->getrowactive = PETSC_FALSE;
1035   PetscFunctionReturn(0);
1036 }
1037 
1038 #undef __FUNCT__
1039 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1040 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1041 {
1042   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1043   PetscErrorCode ierr;
1044 
1045   PetscFunctionBegin;
1046   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1047   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1048   PetscFunctionReturn(0);
1049 }
1050 
1051 #undef __FUNCT__
1052 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1053 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1054 {
1055   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1056   PetscErrorCode ierr;
1057 
1058   PetscFunctionBegin;
1059   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1060   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1061   PetscFunctionReturn(0);
1062 }
1063 
1064 #undef __FUNCT__
1065 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1066 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1067 {
1068   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1069   PetscErrorCode ierr;
1070 
1071   PetscFunctionBegin;
1072   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1073   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1074   PetscFunctionReturn(0);
1075 }
1076 
1077 #undef __FUNCT__
1078 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1079 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1080 {
1081   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1082   Mat            A = a->A,B = a->B;
1083   PetscErrorCode ierr;
1084   PetscReal      isend[5],irecv[5];
1085 
1086   PetscFunctionBegin;
1087   info->block_size     = (PetscReal)matin->bs;
1088   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1089   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1090   isend[3] = info->memory;  isend[4] = info->mallocs;
1091   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1092   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1093   isend[3] += info->memory;  isend[4] += info->mallocs;
1094   if (flag == MAT_LOCAL) {
1095     info->nz_used      = isend[0];
1096     info->nz_allocated = isend[1];
1097     info->nz_unneeded  = isend[2];
1098     info->memory       = isend[3];
1099     info->mallocs      = isend[4];
1100   } else if (flag == MAT_GLOBAL_MAX) {
1101     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1102     info->nz_used      = irecv[0];
1103     info->nz_allocated = irecv[1];
1104     info->nz_unneeded  = irecv[2];
1105     info->memory       = irecv[3];
1106     info->mallocs      = irecv[4];
1107   } else if (flag == MAT_GLOBAL_SUM) {
1108     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1109     info->nz_used      = irecv[0];
1110     info->nz_allocated = irecv[1];
1111     info->nz_unneeded  = irecv[2];
1112     info->memory       = irecv[3];
1113     info->mallocs      = irecv[4];
1114   } else {
1115     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1116   }
1117   info->rows_global       = (PetscReal)A->M;
1118   info->columns_global    = (PetscReal)A->N;
1119   info->rows_local        = (PetscReal)A->m;
1120   info->columns_local     = (PetscReal)A->N;
1121   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1122   info->fill_ratio_needed = 0;
1123   info->factor_mallocs    = 0;
1124   PetscFunctionReturn(0);
1125 }
1126 
1127 #undef __FUNCT__
1128 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1129 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1130 {
1131   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1132   PetscErrorCode ierr;
1133 
1134   PetscFunctionBegin;
1135   switch (op) {
1136   case MAT_NO_NEW_NONZERO_LOCATIONS:
1137   case MAT_YES_NEW_NONZERO_LOCATIONS:
1138   case MAT_COLUMNS_UNSORTED:
1139   case MAT_COLUMNS_SORTED:
1140   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1141   case MAT_KEEP_ZEROED_ROWS:
1142   case MAT_NEW_NONZERO_LOCATION_ERR:
1143     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1144     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1145     break;
1146   case MAT_ROW_ORIENTED:
1147     a->roworiented = PETSC_TRUE;
1148     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1149     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1150     break;
1151   case MAT_ROWS_SORTED:
1152   case MAT_ROWS_UNSORTED:
1153   case MAT_YES_NEW_DIAGONALS:
1154     ierr = PetscLogInfo((A,"Info:MatSetOption_MPIBAIJ:Option ignored\n"));CHKERRQ(ierr);
1155     break;
1156   case MAT_COLUMN_ORIENTED:
1157     a->roworiented = PETSC_FALSE;
1158     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1159     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1160     break;
1161   case MAT_IGNORE_OFF_PROC_ENTRIES:
1162     a->donotstash = PETSC_TRUE;
1163     break;
1164   case MAT_NO_NEW_DIAGONALS:
1165     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1166   case MAT_USE_HASH_TABLE:
1167     a->ht_flag = PETSC_TRUE;
1168     break;
1169   case MAT_NOT_SYMMETRIC:
1170   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1171   case MAT_HERMITIAN:
1172     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1173   case MAT_SYMMETRIC:
1174   case MAT_STRUCTURALLY_SYMMETRIC:
1175   case MAT_NOT_HERMITIAN:
1176   case MAT_SYMMETRY_ETERNAL:
1177   case MAT_NOT_SYMMETRY_ETERNAL:
1178     break;
1179   default:
1180     SETERRQ(PETSC_ERR_SUP,"unknown option");
1181   }
1182   PetscFunctionReturn(0);
1183 }
1184 
1185 #undef __FUNCT__
1186 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1187 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1188 {
1189   PetscErrorCode ierr;
1190   PetscFunctionBegin;
1191   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1192   PetscFunctionReturn(0);
1193 }
1194 
1195 #undef __FUNCT__
1196 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1197 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1198 {
1199   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1200   Mat            a=baij->A, b=baij->B;
1201   PetscErrorCode ierr;
1202   PetscInt       nv,m,n;
1203   PetscTruth     flg;
1204 
1205   PetscFunctionBegin;
1206   if (ll != rr){
1207     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1208     if (!flg)
1209       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1210   }
1211   if (!ll) PetscFunctionReturn(0);
1212 
1213   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1214   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1215 
1216   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1217   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1218 
1219   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1220 
1221   /* left diagonalscale the off-diagonal part */
1222   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1223 
1224   /* scale the diagonal part */
1225   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1226 
1227   /* right diagonalscale the off-diagonal part */
1228   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1229   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1235 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1236 {
1237   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1238   MPI_Comm          comm = A->comm;
1239   static PetscTruth called = PETSC_FALSE;
1240   PetscErrorCode    ierr;
1241 
1242   PetscFunctionBegin;
1243   if (!a->rank) {
1244     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1245   }
1246   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1247   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1248   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1249   PetscFunctionReturn(0);
1250 }
1251 
1252 #undef __FUNCT__
1253 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1254 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1255 {
1256   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1257   PetscErrorCode ierr;
1258 
1259   PetscFunctionBegin;
1260   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1261   PetscFunctionReturn(0);
1262 }
1263 
1264 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1265 
1266 #undef __FUNCT__
1267 #define __FUNCT__ "MatEqual_MPISBAIJ"
1268 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1269 {
1270   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1271   Mat            a,b,c,d;
1272   PetscTruth     flg;
1273   PetscErrorCode ierr;
1274 
1275   PetscFunctionBegin;
1276   a = matA->A; b = matA->B;
1277   c = matB->A; d = matB->B;
1278 
1279   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1280   if (flg) {
1281     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1282   }
1283   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1284   PetscFunctionReturn(0);
1285 }
1286 
1287 #undef __FUNCT__
1288 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1289 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1290 {
1291   PetscErrorCode ierr;
1292 
1293   PetscFunctionBegin;
1294   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1295   PetscFunctionReturn(0);
1296 }
1297 
1298 #undef __FUNCT__
1299 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1300 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1301 {
1302   PetscErrorCode ierr;
1303   PetscInt       i;
1304   PetscTruth     flg;
1305 
1306   PetscFunctionBegin;
1307   for (i=0; i<n; i++) {
1308     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1309     if (!flg) {
1310       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1311     }
1312   }
1313   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1314   PetscFunctionReturn(0);
1315 }
1316 
1317 
1318 /* -------------------------------------------------------------------*/
1319 static struct _MatOps MatOps_Values = {
1320        MatSetValues_MPISBAIJ,
1321        MatGetRow_MPISBAIJ,
1322        MatRestoreRow_MPISBAIJ,
1323        MatMult_MPISBAIJ,
1324 /* 4*/ MatMultAdd_MPISBAIJ,
1325        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1326        MatMultAdd_MPISBAIJ,
1327        0,
1328        0,
1329        0,
1330 /*10*/ 0,
1331        0,
1332        0,
1333        MatRelax_MPISBAIJ,
1334        MatTranspose_MPISBAIJ,
1335 /*15*/ MatGetInfo_MPISBAIJ,
1336        MatEqual_MPISBAIJ,
1337        MatGetDiagonal_MPISBAIJ,
1338        MatDiagonalScale_MPISBAIJ,
1339        MatNorm_MPISBAIJ,
1340 /*20*/ MatAssemblyBegin_MPISBAIJ,
1341        MatAssemblyEnd_MPISBAIJ,
1342        0,
1343        MatSetOption_MPISBAIJ,
1344        MatZeroEntries_MPISBAIJ,
1345 /*25*/ 0,
1346        0,
1347        0,
1348        0,
1349        0,
1350 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1351        0,
1352        0,
1353        0,
1354        0,
1355 /*35*/ MatDuplicate_MPISBAIJ,
1356        0,
1357        0,
1358        0,
1359        0,
1360 /*40*/ 0,
1361        MatGetSubMatrices_MPISBAIJ,
1362        MatIncreaseOverlap_MPISBAIJ,
1363        MatGetValues_MPISBAIJ,
1364        0,
1365 /*45*/ MatPrintHelp_MPISBAIJ,
1366        MatScale_MPISBAIJ,
1367        0,
1368        0,
1369        0,
1370 /*50*/ 0,
1371        0,
1372        0,
1373        0,
1374        0,
1375 /*55*/ 0,
1376        0,
1377        MatSetUnfactored_MPISBAIJ,
1378        0,
1379        MatSetValuesBlocked_MPISBAIJ,
1380 /*60*/ 0,
1381        0,
1382        0,
1383        MatGetPetscMaps_Petsc,
1384        0,
1385 /*65*/ 0,
1386        0,
1387        0,
1388        0,
1389        0,
1390 /*70*/ MatGetRowMax_MPISBAIJ,
1391        0,
1392        0,
1393        0,
1394        0,
1395 /*75*/ 0,
1396        0,
1397        0,
1398        0,
1399        0,
1400 /*80*/ 0,
1401        0,
1402        0,
1403        0,
1404        MatLoad_MPISBAIJ,
1405 /*85*/ 0,
1406        0,
1407        0,
1408        0,
1409        0,
1410 /*90*/ 0,
1411        0,
1412        0,
1413        0,
1414        0,
1415 /*95*/ 0,
1416        0,
1417        0,
1418        0,
1419        0,
1420 /*100*/0,
1421        0,
1422        0,
1423        0,
1424        0,
1425 /*105*/0,
1426        MatRealPart_MPISBAIJ,
1427        MatImaginaryPart_MPISBAIJ
1428 };
1429 
1430 
1431 EXTERN_C_BEGIN
1432 #undef __FUNCT__
1433 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1434 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1435 {
1436   PetscFunctionBegin;
1437   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1438   *iscopy = PETSC_FALSE;
1439   PetscFunctionReturn(0);
1440 }
1441 EXTERN_C_END
1442 
1443 EXTERN_C_BEGIN
1444 #undef __FUNCT__
1445 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1446 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1447 {
1448   Mat_MPISBAIJ   *b;
1449   PetscErrorCode ierr;
1450   PetscInt       i,mbs,Mbs;
1451 
1452   PetscFunctionBegin;
1453   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1454 
1455   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1456   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1457   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1458   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1459   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1460   if (d_nnz) {
1461     for (i=0; i<B->m/bs; i++) {
1462       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1463     }
1464   }
1465   if (o_nnz) {
1466     for (i=0; i<B->m/bs; i++) {
1467       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1468     }
1469   }
1470   B->preallocated = PETSC_TRUE;
1471   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->m,&B->M);CHKERRQ(ierr);
1472   ierr = PetscSplitOwnershipBlock(B->comm,bs,&B->n,&B->N);CHKERRQ(ierr);
1473   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->rmap);CHKERRQ(ierr);
1474   ierr = PetscMapCreateMPI(B->comm,B->m,B->M,&B->cmap);CHKERRQ(ierr);
1475 
1476   b   = (Mat_MPISBAIJ*)B->data;
1477   mbs = B->m/bs;
1478   Mbs = B->M/bs;
1479   if (mbs*bs != B->m) {
1480     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->m,bs);
1481   }
1482 
1483   B->bs  = bs;
1484   b->bs2 = bs*bs;
1485   b->mbs = mbs;
1486   b->nbs = mbs;
1487   b->Mbs = Mbs;
1488   b->Nbs = Mbs;
1489 
1490   ierr = MPI_Allgather(&b->mbs,1,MPIU_INT,b->rowners+1,1,MPIU_INT,B->comm);CHKERRQ(ierr);
1491   b->rowners[0]    = 0;
1492   for (i=2; i<=b->size; i++) {
1493     b->rowners[i] += b->rowners[i-1];
1494   }
1495   b->rstart    = b->rowners[b->rank];
1496   b->rend      = b->rowners[b->rank+1];
1497   b->cstart    = b->rstart;
1498   b->cend      = b->rend;
1499   for (i=0; i<=b->size; i++) {
1500     b->rowners_bs[i] = b->rowners[i]*bs;
1501   }
1502   b->rstart_bs = b-> rstart*bs;
1503   b->rend_bs   = b->rend*bs;
1504 
1505   b->cstart_bs = b->cstart*bs;
1506   b->cend_bs   = b->cend*bs;
1507 
1508   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1509   ierr = MatSetSizes(b->A,B->m,B->m,B->m,B->m);CHKERRQ(ierr);
1510   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1511   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1512   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1513 
1514   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1515   ierr = MatSetSizes(b->B,B->m,B->M,B->m,B->M);CHKERRQ(ierr);
1516   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1517   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1518   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1519 
1520   /* build cache for off array entries formed */
1521   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1522 
1523   PetscFunctionReturn(0);
1524 }
1525 EXTERN_C_END
1526 
1527 /*MC
1528    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1529    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1530 
1531    Options Database Keys:
1532 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1533 
1534   Level: beginner
1535 
1536 .seealso: MatCreateMPISBAIJ
1537 M*/
1538 
1539 EXTERN_C_BEGIN
1540 #undef __FUNCT__
1541 #define __FUNCT__ "MatCreate_MPISBAIJ"
1542 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1543 {
1544   Mat_MPISBAIJ   *b;
1545   PetscErrorCode ierr;
1546   PetscTruth     flg;
1547 
1548   PetscFunctionBegin;
1549 
1550   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1551   B->data = (void*)b;
1552   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1553 
1554   B->ops->destroy    = MatDestroy_MPISBAIJ;
1555   B->ops->view       = MatView_MPISBAIJ;
1556   B->mapping    = 0;
1557   B->factor     = 0;
1558   B->assembled  = PETSC_FALSE;
1559 
1560   B->insertmode = NOT_SET_VALUES;
1561   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1562   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1563 
1564   /* build local table of row and column ownerships */
1565   ierr          = PetscMalloc(3*(b->size+2)*sizeof(PetscInt),&b->rowners);CHKERRQ(ierr);
1566   b->cowners    = b->rowners + b->size + 2;
1567   b->rowners_bs = b->cowners + b->size + 2;
1568   ierr = PetscLogObjectMemory(B,3*(b->size+2)*sizeof(PetscInt)+sizeof(struct _p_Mat)+sizeof(Mat_MPISBAIJ));CHKERRQ(ierr);
1569 
1570   /* build cache for off array entries formed */
1571   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1572   b->donotstash  = PETSC_FALSE;
1573   b->colmap      = PETSC_NULL;
1574   b->garray      = PETSC_NULL;
1575   b->roworiented = PETSC_TRUE;
1576 
1577 #if defined(PETSC_USE_MAT_SINGLE)
1578   /* stuff for MatSetValues_XXX in single precision */
1579   b->setvalueslen     = 0;
1580   b->setvaluescopy    = PETSC_NULL;
1581 #endif
1582 
1583   /* stuff used in block assembly */
1584   b->barray       = 0;
1585 
1586   /* stuff used for matrix vector multiply */
1587   b->lvec         = 0;
1588   b->Mvctx        = 0;
1589   b->slvec0       = 0;
1590   b->slvec0b      = 0;
1591   b->slvec1       = 0;
1592   b->slvec1a      = 0;
1593   b->slvec1b      = 0;
1594   b->sMvctx       = 0;
1595 
1596   /* stuff for MatGetRow() */
1597   b->rowindices   = 0;
1598   b->rowvalues    = 0;
1599   b->getrowactive = PETSC_FALSE;
1600 
1601   /* hash table stuff */
1602   b->ht           = 0;
1603   b->hd           = 0;
1604   b->ht_size      = 0;
1605   b->ht_flag      = PETSC_FALSE;
1606   b->ht_fact      = 0;
1607   b->ht_total_ct  = 0;
1608   b->ht_insert_ct = 0;
1609 
1610   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1611   if (flg) {
1612     PetscReal fact = 1.39;
1613     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1614     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1615     if (fact <= 1.0) fact = 1.39;
1616     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1617     ierr = PetscLogInfo((0,"MatCreateMPISBAIJ:Hash table Factor used %5.2f\n",fact));CHKERRQ(ierr);
1618   }
1619   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1620                                      "MatStoreValues_MPISBAIJ",
1621                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1622   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1623                                      "MatRetrieveValues_MPISBAIJ",
1624                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1625   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1626                                      "MatGetDiagonalBlock_MPISBAIJ",
1627                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1628   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1629                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1630                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1631   B->symmetric                  = PETSC_TRUE;
1632   B->structurally_symmetric     = PETSC_TRUE;
1633   B->symmetric_set              = PETSC_TRUE;
1634   B->structurally_symmetric_set = PETSC_TRUE;
1635   PetscFunctionReturn(0);
1636 }
1637 EXTERN_C_END
1638 
1639 /*MC
1640    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1641 
1642    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1643    and MATMPISBAIJ otherwise.
1644 
1645    Options Database Keys:
1646 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1647 
1648   Level: beginner
1649 
1650 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1651 M*/
1652 
1653 EXTERN_C_BEGIN
1654 #undef __FUNCT__
1655 #define __FUNCT__ "MatCreate_SBAIJ"
1656 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1657 {
1658   PetscErrorCode ierr;
1659   PetscMPIInt    size;
1660 
1661   PetscFunctionBegin;
1662   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1663   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1664   if (size == 1) {
1665     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1666   } else {
1667     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1668   }
1669   PetscFunctionReturn(0);
1670 }
1671 EXTERN_C_END
1672 
1673 #undef __FUNCT__
1674 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1675 /*@C
1676    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1677    the user should preallocate the matrix storage by setting the parameters
1678    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1679    performance can be increased by more than a factor of 50.
1680 
1681    Collective on Mat
1682 
1683    Input Parameters:
1684 +  A - the matrix
1685 .  bs   - size of blockk
1686 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1687            submatrix  (same for all local rows)
1688 .  d_nnz - array containing the number of block nonzeros in the various block rows
1689            in the upper triangular and diagonal part of the in diagonal portion of the local
1690            (possibly different for each block row) or PETSC_NULL.  You must leave room
1691            for the diagonal entry even if it is zero.
1692 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1693            submatrix (same for all local rows).
1694 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1695            off-diagonal portion of the local submatrix (possibly different for
1696            each block row) or PETSC_NULL.
1697 
1698 
1699    Options Database Keys:
1700 .   -mat_no_unroll - uses code that does not unroll the loops in the
1701                      block calculations (much slower)
1702 .   -mat_block_size - size of the blocks to use
1703 
1704    Notes:
1705 
1706    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1707    than it must be used on all processors that share the object for that argument.
1708 
1709    If the *_nnz parameter is given then the *_nz parameter is ignored
1710 
1711    Storage Information:
1712    For a square global matrix we define each processor's diagonal portion
1713    to be its local rows and the corresponding columns (a square submatrix);
1714    each processor's off-diagonal portion encompasses the remainder of the
1715    local matrix (a rectangular submatrix).
1716 
1717    The user can specify preallocated storage for the diagonal part of
1718    the local submatrix with either d_nz or d_nnz (not both).  Set
1719    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1720    memory allocation.  Likewise, specify preallocated storage for the
1721    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1722 
1723    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1724    the figure below we depict these three local rows and all columns (0-11).
1725 
1726 .vb
1727            0 1 2 3 4 5 6 7 8 9 10 11
1728           -------------------
1729    row 3  |  o o o d d d o o o o o o
1730    row 4  |  o o o d d d o o o o o o
1731    row 5  |  o o o d d d o o o o o o
1732           -------------------
1733 .ve
1734 
1735    Thus, any entries in the d locations are stored in the d (diagonal)
1736    submatrix, and any entries in the o locations are stored in the
1737    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1738    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1739 
1740    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1741    plus the diagonal part of the d matrix,
1742    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1743    In general, for PDE problems in which most nonzeros are near the diagonal,
1744    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1745    or you will get TERRIBLE performance; see the users' manual chapter on
1746    matrices.
1747 
1748    Level: intermediate
1749 
1750 .keywords: matrix, block, aij, compressed row, sparse, parallel
1751 
1752 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1753 @*/
1754 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1755 {
1756   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1757 
1758   PetscFunctionBegin;
1759   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1760   if (f) {
1761     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1762   }
1763   PetscFunctionReturn(0);
1764 }
1765 
1766 #undef __FUNCT__
1767 #define __FUNCT__ "MatCreateMPISBAIJ"
1768 /*@C
1769    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1770    (block compressed row).  For good matrix assembly performance
1771    the user should preallocate the matrix storage by setting the parameters
1772    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1773    performance can be increased by more than a factor of 50.
1774 
1775    Collective on MPI_Comm
1776 
1777    Input Parameters:
1778 +  comm - MPI communicator
1779 .  bs   - size of blockk
1780 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1781            This value should be the same as the local size used in creating the
1782            y vector for the matrix-vector product y = Ax.
1783 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1784            This value should be the same as the local size used in creating the
1785            x vector for the matrix-vector product y = Ax.
1786 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1787 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1788 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1789            submatrix  (same for all local rows)
1790 .  d_nnz - array containing the number of block nonzeros in the various block rows
1791            in the upper triangular portion of the in diagonal portion of the local
1792            (possibly different for each block block row) or PETSC_NULL.
1793            You must leave room for the diagonal entry even if it is zero.
1794 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1795            submatrix (same for all local rows).
1796 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1797            off-diagonal portion of the local submatrix (possibly different for
1798            each block row) or PETSC_NULL.
1799 
1800    Output Parameter:
1801 .  A - the matrix
1802 
1803    Options Database Keys:
1804 .   -mat_no_unroll - uses code that does not unroll the loops in the
1805                      block calculations (much slower)
1806 .   -mat_block_size - size of the blocks to use
1807 .   -mat_mpi - use the parallel matrix data structures even on one processor
1808                (defaults to using SeqBAIJ format on one processor)
1809 
1810    Notes:
1811    The number of rows and columns must be divisible by blocksize.
1812 
1813    The user MUST specify either the local or global matrix dimensions
1814    (possibly both).
1815 
1816    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1817    than it must be used on all processors that share the object for that argument.
1818 
1819    If the *_nnz parameter is given then the *_nz parameter is ignored
1820 
1821    Storage Information:
1822    For a square global matrix we define each processor's diagonal portion
1823    to be its local rows and the corresponding columns (a square submatrix);
1824    each processor's off-diagonal portion encompasses the remainder of the
1825    local matrix (a rectangular submatrix).
1826 
1827    The user can specify preallocated storage for the diagonal part of
1828    the local submatrix with either d_nz or d_nnz (not both).  Set
1829    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1830    memory allocation.  Likewise, specify preallocated storage for the
1831    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1832 
1833    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1834    the figure below we depict these three local rows and all columns (0-11).
1835 
1836 .vb
1837            0 1 2 3 4 5 6 7 8 9 10 11
1838           -------------------
1839    row 3  |  o o o d d d o o o o o o
1840    row 4  |  o o o d d d o o o o o o
1841    row 5  |  o o o d d d o o o o o o
1842           -------------------
1843 .ve
1844 
1845    Thus, any entries in the d locations are stored in the d (diagonal)
1846    submatrix, and any entries in the o locations are stored in the
1847    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1848    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1849 
1850    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1851    plus the diagonal part of the d matrix,
1852    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1853    In general, for PDE problems in which most nonzeros are near the diagonal,
1854    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1855    or you will get TERRIBLE performance; see the users' manual chapter on
1856    matrices.
1857 
1858    Level: intermediate
1859 
1860 .keywords: matrix, block, aij, compressed row, sparse, parallel
1861 
1862 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1863 @*/
1864 
1865 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
1866 {
1867   PetscErrorCode ierr;
1868   PetscMPIInt    size;
1869 
1870   PetscFunctionBegin;
1871   ierr = MatCreate(comm,A);CHKERRQ(ierr);
1872   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
1873   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
1874   if (size > 1) {
1875     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
1876     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1877   } else {
1878     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
1879     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1880   }
1881   PetscFunctionReturn(0);
1882 }
1883 
1884 
1885 #undef __FUNCT__
1886 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
1887 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
1888 {
1889   Mat            mat;
1890   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
1891   PetscErrorCode ierr;
1892   PetscInt       len=0,nt,bs=matin->bs,mbs=oldmat->mbs;
1893   PetscScalar    *array;
1894 
1895   PetscFunctionBegin;
1896   *newmat       = 0;
1897   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
1898   ierr = MatSetSizes(mat,matin->m,matin->n,matin->M,matin->N);CHKERRQ(ierr);
1899   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
1900   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
1901 
1902   mat->factor       = matin->factor;
1903   mat->preallocated = PETSC_TRUE;
1904   mat->assembled    = PETSC_TRUE;
1905   mat->insertmode   = NOT_SET_VALUES;
1906 
1907   a = (Mat_MPISBAIJ*)mat->data;
1908   mat->bs  = matin->bs;
1909   a->bs2   = oldmat->bs2;
1910   a->mbs   = oldmat->mbs;
1911   a->nbs   = oldmat->nbs;
1912   a->Mbs   = oldmat->Mbs;
1913   a->Nbs   = oldmat->Nbs;
1914 
1915   a->rstart       = oldmat->rstart;
1916   a->rend         = oldmat->rend;
1917   a->cstart       = oldmat->cstart;
1918   a->cend         = oldmat->cend;
1919   a->size         = oldmat->size;
1920   a->rank         = oldmat->rank;
1921   a->donotstash   = oldmat->donotstash;
1922   a->roworiented  = oldmat->roworiented;
1923   a->rowindices   = 0;
1924   a->rowvalues    = 0;
1925   a->getrowactive = PETSC_FALSE;
1926   a->barray       = 0;
1927   a->rstart_bs    = oldmat->rstart_bs;
1928   a->rend_bs      = oldmat->rend_bs;
1929   a->cstart_bs    = oldmat->cstart_bs;
1930   a->cend_bs      = oldmat->cend_bs;
1931 
1932   /* hash table stuff */
1933   a->ht           = 0;
1934   a->hd           = 0;
1935   a->ht_size      = 0;
1936   a->ht_flag      = oldmat->ht_flag;
1937   a->ht_fact      = oldmat->ht_fact;
1938   a->ht_total_ct  = 0;
1939   a->ht_insert_ct = 0;
1940 
1941   ierr = PetscMemcpy(a->rowners,oldmat->rowners,3*(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
1942   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
1943   ierr = MatStashCreate_Private(matin->comm,matin->bs,&mat->bstash);CHKERRQ(ierr);
1944   if (oldmat->colmap) {
1945 #if defined (PETSC_USE_CTABLE)
1946     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
1947 #else
1948     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
1949     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1950     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
1951 #endif
1952   } else a->colmap = 0;
1953 
1954   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
1955     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
1956     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
1957     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
1958   } else a->garray = 0;
1959 
1960   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
1961   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
1962   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
1963   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
1964 
1965   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
1966   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1967   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
1968   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1969 
1970   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
1971   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
1972   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
1973   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
1974   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
1975   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
1976   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
1977   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
1978   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
1979   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
1980   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
1981   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
1982   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
1983 
1984   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
1985   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
1986   a->sMvctx = oldmat->sMvctx;
1987   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
1988 
1989   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
1990   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
1991   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
1992   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
1993   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
1994   *newmat = mat;
1995   PetscFunctionReturn(0);
1996 }
1997 
1998 #include "petscsys.h"
1999 
2000 #undef __FUNCT__
2001 #define __FUNCT__ "MatLoad_MPISBAIJ"
2002 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2003 {
2004   Mat            A;
2005   PetscErrorCode ierr;
2006   PetscInt       i,nz,j,rstart,rend;
2007   PetscScalar    *vals,*buf;
2008   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2009   MPI_Status     status;
2010   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2011   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2012   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2013   PetscInt       bs=1,Mbs,mbs,extra_rows;
2014   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2015   PetscInt       dcount,kmax,k,nzcount,tmp;
2016   int            fd;
2017 
2018   PetscFunctionBegin;
2019   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2020 
2021   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2022   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2023   if (!rank) {
2024     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2025     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2026     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2027     if (header[3] < 0) {
2028       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2029     }
2030   }
2031 
2032   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2033   M = header[1]; N = header[2];
2034 
2035   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2036 
2037   /*
2038      This code adds extra rows to make sure the number of rows is
2039      divisible by the blocksize
2040   */
2041   Mbs        = M/bs;
2042   extra_rows = bs - M + bs*(Mbs);
2043   if (extra_rows == bs) extra_rows = 0;
2044   else                  Mbs++;
2045   if (extra_rows &&!rank) {
2046     ierr = PetscLogInfo((0,"MatLoad_MPISBAIJ:Padding loaded matrix to match blocksize\n"));CHKERRQ(ierr);
2047   }
2048 
2049   /* determine ownership of all rows */
2050   mbs        = Mbs/size + ((Mbs % size) > rank);
2051   m          = mbs*bs;
2052   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2053   browners   = rowners + size + 1;
2054   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2055   rowners[0] = 0;
2056   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2057   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2058   rstart = rowners[rank];
2059   rend   = rowners[rank+1];
2060 
2061   /* distribute row lengths to all processors */
2062   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2063   if (!rank) {
2064     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2065     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2066     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2067     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2068     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2069     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2070     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2071   } else {
2072     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2073   }
2074 
2075   if (!rank) {   /* procs[0] */
2076     /* calculate the number of nonzeros on each processor */
2077     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2078     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2079     for (i=0; i<size; i++) {
2080       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2081         procsnz[i] += rowlengths[j];
2082       }
2083     }
2084     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2085 
2086     /* determine max buffer needed and allocate it */
2087     maxnz = 0;
2088     for (i=0; i<size; i++) {
2089       maxnz = PetscMax(maxnz,procsnz[i]);
2090     }
2091     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2092 
2093     /* read in my part of the matrix column indices  */
2094     nz     = procsnz[0];
2095     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2096     mycols = ibuf;
2097     if (size == 1)  nz -= extra_rows;
2098     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2099     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2100 
2101     /* read in every ones (except the last) and ship off */
2102     for (i=1; i<size-1; i++) {
2103       nz   = procsnz[i];
2104       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2105       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2106     }
2107     /* read in the stuff for the last proc */
2108     if (size != 1) {
2109       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2110       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2111       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2112       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2113     }
2114     ierr = PetscFree(cols);CHKERRQ(ierr);
2115   } else {  /* procs[i], i>0 */
2116     /* determine buffer space needed for message */
2117     nz = 0;
2118     for (i=0; i<m; i++) {
2119       nz += locrowlens[i];
2120     }
2121     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2122     mycols = ibuf;
2123     /* receive message of column indices*/
2124     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2125     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2126     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2127   }
2128 
2129   /* loop over local rows, determining number of off diagonal entries */
2130   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2131   odlens   = dlens + (rend-rstart);
2132   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2133   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2134   masked1  = mask    + Mbs;
2135   masked2  = masked1 + Mbs;
2136   rowcount = 0; nzcount = 0;
2137   for (i=0; i<mbs; i++) {
2138     dcount  = 0;
2139     odcount = 0;
2140     for (j=0; j<bs; j++) {
2141       kmax = locrowlens[rowcount];
2142       for (k=0; k<kmax; k++) {
2143         tmp = mycols[nzcount++]/bs; /* block col. index */
2144         if (!mask[tmp]) {
2145           mask[tmp] = 1;
2146           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2147           else masked1[dcount++] = tmp; /* entry in diag portion */
2148         }
2149       }
2150       rowcount++;
2151     }
2152 
2153     dlens[i]  = dcount;  /* d_nzz[i] */
2154     odlens[i] = odcount; /* o_nzz[i] */
2155 
2156     /* zero out the mask elements we set */
2157     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2158     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2159   }
2160 
2161   /* create our matrix */
2162   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2163   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2164   ierr = MatSetType(A,type);CHKERRQ(ierr);
2165   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2166   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2167 
2168   if (!rank) {
2169     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2170     /* read in my part of the matrix numerical values  */
2171     nz = procsnz[0];
2172     vals = buf;
2173     mycols = ibuf;
2174     if (size == 1)  nz -= extra_rows;
2175     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2176     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2177 
2178     /* insert into matrix */
2179     jj      = rstart*bs;
2180     for (i=0; i<m; i++) {
2181       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2182       mycols += locrowlens[i];
2183       vals   += locrowlens[i];
2184       jj++;
2185     }
2186 
2187     /* read in other processors (except the last one) and ship out */
2188     for (i=1; i<size-1; i++) {
2189       nz   = procsnz[i];
2190       vals = buf;
2191       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2192       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2193     }
2194     /* the last proc */
2195     if (size != 1){
2196       nz   = procsnz[i] - extra_rows;
2197       vals = buf;
2198       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2199       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2200       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2201     }
2202     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2203 
2204   } else {
2205     /* receive numeric values */
2206     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2207 
2208     /* receive message of values*/
2209     vals   = buf;
2210     mycols = ibuf;
2211     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2212     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2213     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2214 
2215     /* insert into matrix */
2216     jj      = rstart*bs;
2217     for (i=0; i<m; i++) {
2218       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2219       mycols += locrowlens[i];
2220       vals   += locrowlens[i];
2221       jj++;
2222     }
2223   }
2224 
2225   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2226   ierr = PetscFree(buf);CHKERRQ(ierr);
2227   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2228   ierr = PetscFree(rowners);CHKERRQ(ierr);
2229   ierr = PetscFree(dlens);CHKERRQ(ierr);
2230   ierr = PetscFree(mask);CHKERRQ(ierr);
2231   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2232   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2233   *newmat = A;
2234   PetscFunctionReturn(0);
2235 }
2236 
2237 #undef __FUNCT__
2238 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2239 /*XXXXX@
2240    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2241 
2242    Input Parameters:
2243 .  mat  - the matrix
2244 .  fact - factor
2245 
2246    Collective on Mat
2247 
2248    Level: advanced
2249 
2250   Notes:
2251    This can also be set by the command line option: -mat_use_hash_table fact
2252 
2253 .keywords: matrix, hashtable, factor, HT
2254 
2255 .seealso: MatSetOption()
2256 @XXXXX*/
2257 
2258 
2259 #undef __FUNCT__
2260 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2261 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2262 {
2263   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2264   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2265   PetscReal      atmp;
2266   PetscReal      *work,*svalues,*rvalues;
2267   PetscErrorCode ierr;
2268   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2269   PetscMPIInt    rank,size;
2270   PetscInt       *rowners_bs,dest,count,source;
2271   PetscScalar    *va;
2272   MatScalar      *ba;
2273   MPI_Status     stat;
2274 
2275   PetscFunctionBegin;
2276   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2277   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2278 
2279   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2280   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2281 
2282   bs   = A->bs;
2283   mbs  = a->mbs;
2284   Mbs  = a->Mbs;
2285   ba   = b->a;
2286   bi   = b->i;
2287   bj   = b->j;
2288 
2289   /* find ownerships */
2290   rowners_bs = a->rowners_bs;
2291 
2292   /* each proc creates an array to be distributed */
2293   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2294   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2295 
2296   /* row_max for B */
2297   if (rank != size-1){
2298     for (i=0; i<mbs; i++) {
2299       ncols = bi[1] - bi[0]; bi++;
2300       brow  = bs*i;
2301       for (j=0; j<ncols; j++){
2302         bcol = bs*(*bj);
2303         for (kcol=0; kcol<bs; kcol++){
2304           col = bcol + kcol;                 /* local col index */
2305           col += rowners_bs[rank+1];      /* global col index */
2306           for (krow=0; krow<bs; krow++){
2307             atmp = PetscAbsScalar(*ba); ba++;
2308             row = brow + krow;    /* local row index */
2309             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2310             if (work[col] < atmp) work[col] = atmp;
2311           }
2312         }
2313         bj++;
2314       }
2315     }
2316 
2317     /* send values to its owners */
2318     for (dest=rank+1; dest<size; dest++){
2319       svalues = work + rowners_bs[dest];
2320       count   = rowners_bs[dest+1]-rowners_bs[dest];
2321       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2322     }
2323   }
2324 
2325   /* receive values */
2326   if (rank){
2327     rvalues = work;
2328     count   = rowners_bs[rank+1]-rowners_bs[rank];
2329     for (source=0; source<rank; source++){
2330       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2331       /* process values */
2332       for (i=0; i<count; i++){
2333         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2334       }
2335     }
2336   }
2337 
2338   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2339   ierr = PetscFree(work);CHKERRQ(ierr);
2340   PetscFunctionReturn(0);
2341 }
2342 
2343 #undef __FUNCT__
2344 #define __FUNCT__ "MatRelax_MPISBAIJ"
2345 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2346 {
2347   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2348   PetscErrorCode ierr;
2349   PetscInt       mbs=mat->mbs,bs=matin->bs;
2350   PetscScalar    *x,*b,*ptr,zero=0.0;
2351   Vec            bb1;
2352 
2353   PetscFunctionBegin;
2354   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2355   if (bs > 1)
2356     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2357 
2358   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2359     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2360       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2361       its--;
2362     }
2363 
2364     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2365     while (its--){
2366 
2367       /* lower triangular part: slvec0b = - B^T*xx */
2368       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2369 
2370       /* copy xx into slvec0a */
2371       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2372       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2373       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2374       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2375 
2376       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2377 
2378       /* copy bb into slvec1a */
2379       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2380       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2381       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2382       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2383 
2384       /* set slvec1b = 0 */
2385       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2386 
2387       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2388       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2389       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2390       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2391 
2392       /* upper triangular part: bb1 = bb1 - B*x */
2393       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2394 
2395       /* local diagonal sweep */
2396       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2397     }
2398     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2399   } else {
2400     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2401   }
2402   PetscFunctionReturn(0);
2403 }
2404 
2405 #undef __FUNCT__
2406 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2407 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2408 {
2409   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2410   PetscErrorCode ierr;
2411   Vec            lvec1,bb1;
2412 
2413   PetscFunctionBegin;
2414   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2415   if (matin->bs > 1)
2416     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2417 
2418   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2419     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2420       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2421       its--;
2422     }
2423 
2424     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2425     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2426     while (its--){
2427       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2428 
2429       /* lower diagonal part: bb1 = bb - B^T*xx */
2430       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2431       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2432 
2433       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2434       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2435       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2436 
2437       /* upper diagonal part: bb1 = bb1 - B*x */
2438       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2439       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2440 
2441       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2442 
2443       /* diagonal sweep */
2444       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2445     }
2446     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2447     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2448   } else {
2449     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2450   }
2451   PetscFunctionReturn(0);
2452 }
2453 
2454