xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 2a4c71feefe0dc84b48621684d40b17efe7de161)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(a,bs2,nrow,brow,bcol,rmax,aa,ai,aj,a->mbs,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(b,bs2,nrow,brow,bcol,rmax,ba,bi,bj,b->mbs,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     if (b->setvaluescopy) {ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);}
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPIBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=mat->rmap.rstart;
218   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
219   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc=0;
238   MatScalar     *v_loc=0;
239 
240   PetscFunctionBegin;
241 
242   if(!baij->donotstash){
243     ierr = PetscMalloc(n*sizeof(PetscInt),&in_loc);CHKERRQ(ierr);
244     ierr = PetscMalloc(n*sizeof(MatScalar),&v_loc);CHKERRQ(ierr);
245   }
246 
247   for (i=0; i<m; i++) {
248     if (im[i] < 0) continue;
249 #if defined(PETSC_USE_DEBUG)
250     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
251 #endif
252     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
253       row = im[i] - rstart_orig;              /* local row index */
254       for (j=0; j<n; j++) {
255         if (im[i]/bs > in[j]/bs){
256           if (a->ignore_ltriangular){
257             continue;    /* ignore lower triangular blocks */
258           } else {
259             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
260           }
261         }
262         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
263           col = in[j] - cstart_orig;          /* local col index */
264           brow = row/bs; bcol = col/bs;
265           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
266           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
267           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
268           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
269         } else if (in[j] < 0) continue;
270 #if defined(PETSC_USE_DEBUG)
271         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
272 #endif
273         else {  /* off-diag entry (B) */
274           if (mat->was_assembled) {
275             if (!baij->colmap) {
276               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
277             }
278 #if defined (PETSC_USE_CTABLE)
279             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
280             col  = col - 1;
281 #else
282             col = baij->colmap[in[j]/bs] - 1;
283 #endif
284             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
285               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
286               col =  in[j];
287               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
288               B = baij->B;
289               b = (Mat_SeqBAIJ*)(B)->data;
290               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
291               ba=b->a;
292             } else col += in[j]%bs;
293           } else col = in[j];
294           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
295           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
296           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
297         }
298       }
299     } else {  /* off processor entry */
300       if (!baij->donotstash) {
301         n_loc = 0;
302         for (j=0; j<n; j++){
303           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
304           in_loc[n_loc] = in[j];
305           if (roworiented) {
306             v_loc[n_loc] = v[i*n+j];
307           } else {
308             v_loc[n_loc] = v[j*m+i];
309           }
310           n_loc++;
311         }
312         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
313       }
314     }
315   }
316 
317   if(!baij->donotstash){
318     ierr = PetscFree(in_loc);CHKERRQ(ierr);
319     ierr = PetscFree(v_loc);CHKERRQ(ierr);
320   }
321   PetscFunctionReturn(0);
322 }
323 
324 #undef __FUNCT__
325 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
326 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
327 {
328   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
329   const MatScalar *value;
330   MatScalar       *barray=baij->barray;
331   PetscTruth      roworiented = baij->roworiented;
332   PetscErrorCode  ierr;
333   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
334   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
335   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
336 
337   PetscFunctionBegin;
338   if(!barray) {
339     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
340     baij->barray = barray;
341   }
342 
343   if (roworiented) {
344     stepval = (n-1)*bs;
345   } else {
346     stepval = (m-1)*bs;
347   }
348   for (i=0; i<m; i++) {
349     if (im[i] < 0) continue;
350 #if defined(PETSC_USE_DEBUG)
351     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
352 #endif
353     if (im[i] >= rstart && im[i] < rend) {
354       row = im[i] - rstart;
355       for (j=0; j<n; j++) {
356         /* If NumCol = 1 then a copy is not required */
357         if ((roworiented) && (n == 1)) {
358           barray = (MatScalar*) v + i*bs2;
359         } else if((!roworiented) && (m == 1)) {
360           barray = (MatScalar*) v + j*bs2;
361         } else { /* Here a copy is required */
362           if (roworiented) {
363             value = v + i*(stepval+bs)*bs + j*bs;
364           } else {
365             value = v + j*(stepval+bs)*bs + i*bs;
366           }
367           for (ii=0; ii<bs; ii++,value+=stepval) {
368             for (jj=0; jj<bs; jj++) {
369               *barray++  = *value++;
370             }
371           }
372           barray -=bs2;
373         }
374 
375         if (in[j] >= cstart && in[j] < cend){
376           col  = in[j] - cstart;
377           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
378         }
379         else if (in[j] < 0) continue;
380 #if defined(PETSC_USE_DEBUG)
381         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
382 #endif
383         else {
384           if (mat->was_assembled) {
385             if (!baij->colmap) {
386               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
387             }
388 
389 #if defined(PETSC_USE_DEBUG)
390 #if defined (PETSC_USE_CTABLE)
391             { PetscInt data;
392               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
393               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
394             }
395 #else
396             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
397 #endif
398 #endif
399 #if defined (PETSC_USE_CTABLE)
400 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
401             col  = (col - 1)/bs;
402 #else
403             col = (baij->colmap[in[j]] - 1)/bs;
404 #endif
405             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
406               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
407               col =  in[j];
408             }
409           }
410           else col = in[j];
411           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
412         }
413       }
414     } else {
415       if (!baij->donotstash) {
416         if (roworiented) {
417           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
418         } else {
419           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
420         }
421       }
422     }
423   }
424   PetscFunctionReturn(0);
425 }
426 
427 #undef __FUNCT__
428 #define __FUNCT__ "MatGetValues_MPISBAIJ"
429 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
430 {
431   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
432   PetscErrorCode ierr;
433   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
434   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
435 
436   PetscFunctionBegin;
437   for (i=0; i<m; i++) {
438     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
439     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
440     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
441       row = idxm[i] - bsrstart;
442       for (j=0; j<n; j++) {
443         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
444         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
445         if (idxn[j] >= bscstart && idxn[j] < bscend){
446           col = idxn[j] - bscstart;
447           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
448         } else {
449           if (!baij->colmap) {
450             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
451           }
452 #if defined (PETSC_USE_CTABLE)
453           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
454           data --;
455 #else
456           data = baij->colmap[idxn[j]/bs]-1;
457 #endif
458           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
459           else {
460             col  = data + idxn[j]%bs;
461             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
462           }
463         }
464       }
465     } else {
466       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
467     }
468   }
469  PetscFunctionReturn(0);
470 }
471 
472 #undef __FUNCT__
473 #define __FUNCT__ "MatNorm_MPISBAIJ"
474 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
475 {
476   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
477   PetscErrorCode ierr;
478   PetscReal      sum[2],*lnorm2;
479 
480   PetscFunctionBegin;
481   if (baij->size == 1) {
482     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
483   } else {
484     if (type == NORM_FROBENIUS) {
485       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
486       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
487       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
488       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
489       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
490       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
491       *norm = sqrt(sum[0] + 2*sum[1]);
492       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
493     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
494       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
495       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
496       PetscReal    *rsum,*rsum2,vabs;
497       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
498       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
499       MatScalar    *v;
500 
501       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
502       rsum2 = rsum + mat->cmap.N;
503       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
504       /* Amat */
505       v = amat->a; jj = amat->j;
506       for (brow=0; brow<mbs; brow++) {
507         grow = bs*(rstart + brow);
508         nz = amat->i[brow+1] - amat->i[brow];
509         for (bcol=0; bcol<nz; bcol++){
510           gcol = bs*(rstart + *jj); jj++;
511           for (col=0; col<bs; col++){
512             for (row=0; row<bs; row++){
513               vabs = PetscAbsScalar(*v); v++;
514               rsum[gcol+col] += vabs;
515               /* non-diagonal block */
516               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
517             }
518           }
519         }
520       }
521       /* Bmat */
522       v = bmat->a; jj = bmat->j;
523       for (brow=0; brow<mbs; brow++) {
524         grow = bs*(rstart + brow);
525         nz = bmat->i[brow+1] - bmat->i[brow];
526         for (bcol=0; bcol<nz; bcol++){
527           gcol = bs*garray[*jj]; jj++;
528           for (col=0; col<bs; col++){
529             for (row=0; row<bs; row++){
530               vabs = PetscAbsScalar(*v); v++;
531               rsum[gcol+col] += vabs;
532               rsum[grow+row] += vabs;
533             }
534           }
535         }
536       }
537       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
538       *norm = 0.0;
539       for (col=0; col<mat->cmap.N; col++) {
540         if (rsum2[col] > *norm) *norm = rsum2[col];
541       }
542       ierr = PetscFree(rsum);CHKERRQ(ierr);
543     } else {
544       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
545     }
546   }
547   PetscFunctionReturn(0);
548 }
549 
550 #undef __FUNCT__
551 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
552 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
553 {
554   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
555   PetscErrorCode ierr;
556   PetscInt       nstash,reallocs;
557   InsertMode     addv;
558 
559   PetscFunctionBegin;
560   if (baij->donotstash) {
561     PetscFunctionReturn(0);
562   }
563 
564   /* make sure all processors are either in INSERTMODE or ADDMODE */
565   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
566   if (addv == (ADD_VALUES|INSERT_VALUES)) {
567     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
568   }
569   mat->insertmode = addv; /* in case this processor had no cache */
570 
571   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
572   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
573   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
574   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
575   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
576   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
577   PetscFunctionReturn(0);
578 }
579 
580 #undef __FUNCT__
581 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
582 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
583 {
584   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
585   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
586   Mat_SeqBAIJ    *b=(Mat_SeqBAIJ*)baij->B->data;
587   PetscErrorCode ierr;
588   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
589   PetscInt       *row,*col,other_disassembled;
590   PetscMPIInt    n;
591   PetscTruth     r1,r2,r3;
592   MatScalar      *val;
593   InsertMode     addv = mat->insertmode;
594 
595   PetscFunctionBegin;
596 
597   if (!baij->donotstash) {
598     while (1) {
599       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
600       if (!flg) break;
601 
602       for (i=0; i<n;) {
603         /* Now identify the consecutive vals belonging to the same row */
604         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
605         if (j < n) ncols = j-i;
606         else       ncols = n-i;
607         /* Now assemble all these values with a single function call */
608         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
609         i = j;
610       }
611     }
612     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
613     /* Now process the block-stash. Since the values are stashed column-oriented,
614        set the roworiented flag to column oriented, and after MatSetValues()
615        restore the original flags */
616     r1 = baij->roworiented;
617     r2 = a->roworiented;
618     r3 = b->roworiented;
619     baij->roworiented = PETSC_FALSE;
620     a->roworiented    = PETSC_FALSE;
621     b->roworiented    = PETSC_FALSE;
622     while (1) {
623       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
624       if (!flg) break;
625 
626       for (i=0; i<n;) {
627         /* Now identify the consecutive vals belonging to the same row */
628         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
629         if (j < n) ncols = j-i;
630         else       ncols = n-i;
631         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
632         i = j;
633       }
634     }
635     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
636     baij->roworiented = r1;
637     a->roworiented    = r2;
638     b->roworiented    = r3;
639   }
640 
641   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
642   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
643 
644   /* determine if any processor has disassembled, if so we must
645      also disassemble ourselfs, in order that we may reassemble. */
646   /*
647      if nonzero structure of submatrix B cannot change then we know that
648      no processor disassembled thus we can skip this stuff
649   */
650   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
651     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
652     if (mat->was_assembled && !other_disassembled) {
653       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
654     }
655   }
656 
657   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
658     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
659   }
660   b->compressedrow.use = PETSC_TRUE;
661   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
662   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
663 
664   if (baij->rowvalues) {
665     ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
666     baij->rowvalues = 0;
667   }
668 
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675 {
676   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode    ierr;
678   PetscInt          bs = mat->rmap.bs;
679   PetscMPIInt       size = baij->size,rank = baij->rank;
680   PetscTruth        iascii,isdraw;
681   PetscViewer       sviewer;
682   PetscViewerFormat format;
683 
684   PetscFunctionBegin;
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
687   if (iascii) {
688     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
692       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
702       PetscFunctionReturn(0);
703     } else if (format == PETSC_VIEWER_ASCII_INFO) {
704       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
705       PetscFunctionReturn(0);
706     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
707       PetscFunctionReturn(0);
708     }
709   }
710 
711   if (isdraw) {
712     PetscDraw       draw;
713     PetscTruth isnull;
714     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
715     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
716   }
717 
718   if (size == 1) {
719     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
720     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
721   } else {
722     /* assemble the entire matrix onto first processor. */
723     Mat         A;
724     Mat_SeqSBAIJ *Aloc;
725     Mat_SeqBAIJ *Bloc;
726     PetscInt         M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
727     MatScalar   *a;
728 
729     /* Should this be the same type as mat? */
730     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
737     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
738     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
739 
740     /* copy over the A part */
741     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
742     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
743     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
744 
745     for (i=0; i<mbs; i++) {
746       rvals[0] = mat->rmap.rstart + bs*i;
747       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
748       for (j=ai[i]; j<ai[i+1]; j++) {
749         col = mat->cmap.rstart+aj[j]*bs;
750         for (k=0; k<bs; k++) {
751           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
752           col++; a += bs;
753         }
754       }
755     }
756     /* copy over the B part */
757     Bloc = (Mat_SeqBAIJ*)baij->B->data;
758     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
759     for (i=0; i<mbs; i++) {
760       rvals[0] = mat->rmap.rstart + bs;
761       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
762       for (j=ai[i]; j<ai[i+1]; j++) {
763         col = baij->garray[aj[j]]*bs;
764         for (k=0; k<bs; k++) {
765           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
766           col++; a += bs;
767         }
768       }
769     }
770     ierr = PetscFree(rvals);CHKERRQ(ierr);
771     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
772     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773     /*
774        Everyone has to call to draw the matrix since the graphics waits are
775        synchronized across all processors that share the PetscDraw object
776     */
777     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
778     if (!rank) {
779       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
780       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
781     }
782     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
783     ierr = MatDestroy(A);CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPISBAIJ"
790 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
791 {
792   PetscErrorCode ierr;
793   PetscTruth     iascii,isdraw,issocket,isbinary;
794 
795   PetscFunctionBegin;
796   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
797   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
800   if (iascii || isdraw || issocket || isbinary) {
801     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
802   } else {
803     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
804   }
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatDestroy_MPISBAIJ"
810 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
811 {
812   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816 #if defined(PETSC_USE_LOG)
817   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
818 #endif
819   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
820   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
821   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
822   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
823 #if defined (PETSC_USE_CTABLE)
824   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
825 #else
826   if (baij->colmap) {ierr = PetscFree(baij->colmap);CHKERRQ(ierr);}
827 #endif
828   if (baij->garray) {ierr = PetscFree(baij->garray);CHKERRQ(ierr);}
829   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
830   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
831   if (baij->slvec0) {
832     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
834   }
835   if (baij->slvec1) {
836     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
837     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
838     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
839   }
840   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
841   if (baij->rowvalues) {ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);}
842   if (baij->barray) {ierr = PetscFree(baij->barray);CHKERRQ(ierr);}
843   if (baij->hd) {ierr = PetscFree(baij->hd);CHKERRQ(ierr);}
844 #if defined(PETSC_USE_MAT_SINGLE)
845   if (baij->setvaluescopy) {ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);}
846 #endif
847   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
848   ierr = PetscFree(baij);CHKERRQ(ierr);
849 
850   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
851   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
852   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
854   PetscFunctionReturn(0);
855 }
856 
857 #undef __FUNCT__
858 #define __FUNCT__ "MatMult_MPISBAIJ"
859 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
860 {
861   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
862   PetscErrorCode ierr;
863   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
864   PetscScalar    *x,*from,zero=0.0;
865 
866   PetscFunctionBegin;
867   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
868   if (nt != A->cmap.n) {
869     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
870   }
871 
872   /* diagonal part */
873   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
874   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
875 
876   /* subdiagonal part */
877   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
878   CHKMEMQ;
879   /* copy x into the vec slvec0 */
880   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
881   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
882   CHKMEMQ;
883   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
884   CHKMEMQ;
885   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
886 
887   CHKMEMQ;
888   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
889   CHKMEMQ;
890   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
891   CHKMEMQ;
892   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
893     CHKMEMQ;
894   /* supperdiagonal part */
895   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
896     CHKMEMQ;
897   PetscFunctionReturn(0);
898 }
899 
900 #undef __FUNCT__
901 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
902 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
903 {
904   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
905   PetscErrorCode ierr;
906   PetscInt       nt;
907 
908   PetscFunctionBegin;
909   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
910   if (nt != A->cmap.n) {
911     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
912   }
913   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
914   if (nt != A->rmap.N) {
915     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
916   }
917 
918   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
919   /* do diagonal part */
920   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
921   /* do supperdiagonal part */
922   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
923   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
924   /* do subdiagonal part */
925   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
926   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
927   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
928 
929   PetscFunctionReturn(0);
930 }
931 
932 #undef __FUNCT__
933 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
934 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
935 {
936   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
937   PetscErrorCode ierr;
938   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
939   PetscScalar    *x,*from,zero=0.0;
940 
941   PetscFunctionBegin;
942   /*
943   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
944   PetscSynchronizedFlush(A->comm);
945   */
946   /* diagonal part */
947   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
948   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
949 
950   /* subdiagonal part */
951   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
952 
953   /* copy x into the vec slvec0 */
954   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
955   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
956   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
957   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
958 
959   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
960   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
961   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
962 
963   /* supperdiagonal part */
964   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
965 
966   PetscFunctionReturn(0);
967 }
968 
969 #undef __FUNCT__
970 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
971 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
972 {
973   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
974   PetscErrorCode ierr;
975 
976   PetscFunctionBegin;
977   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
978   /* do diagonal part */
979   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
980   /* do supperdiagonal part */
981   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
982   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
983 
984   /* do subdiagonal part */
985   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
986   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
987   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
988 
989   PetscFunctionReturn(0);
990 }
991 
992 /*
993   This only works correctly for square matrices where the subblock A->A is the
994    diagonal block
995 */
996 #undef __FUNCT__
997 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
998 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
999 {
1000   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1001   PetscErrorCode ierr;
1002 
1003   PetscFunctionBegin;
1004   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1005   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1006   PetscFunctionReturn(0);
1007 }
1008 
1009 #undef __FUNCT__
1010 #define __FUNCT__ "MatScale_MPISBAIJ"
1011 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1012 {
1013   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1014   PetscErrorCode ierr;
1015 
1016   PetscFunctionBegin;
1017   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1018   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1019   PetscFunctionReturn(0);
1020 }
1021 
1022 #undef __FUNCT__
1023 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1024 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1025 {
1026   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1027   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1028   PetscErrorCode ierr;
1029   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1030   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1031   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1032 
1033   PetscFunctionBegin;
1034   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1035   mat->getrowactive = PETSC_TRUE;
1036 
1037   if (!mat->rowvalues && (idx || v)) {
1038     /*
1039         allocate enough space to hold information from the longest row.
1040     */
1041     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1042     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1043     PetscInt     max = 1,mbs = mat->mbs,tmp;
1044     for (i=0; i<mbs; i++) {
1045       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1046       if (max < tmp) { max = tmp; }
1047     }
1048     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1049     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1050   }
1051 
1052   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1053   lrow = row - brstart;  /* local row index */
1054 
1055   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1056   if (!v)   {pvA = 0; pvB = 0;}
1057   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1058   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1059   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1060   nztot = nzA + nzB;
1061 
1062   cmap  = mat->garray;
1063   if (v  || idx) {
1064     if (nztot) {
1065       /* Sort by increasing column numbers, assuming A and B already sorted */
1066       PetscInt imark = -1;
1067       if (v) {
1068         *v = v_p = mat->rowvalues;
1069         for (i=0; i<nzB; i++) {
1070           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1071           else break;
1072         }
1073         imark = i;
1074         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1075         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1076       }
1077       if (idx) {
1078         *idx = idx_p = mat->rowindices;
1079         if (imark > -1) {
1080           for (i=0; i<imark; i++) {
1081             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1082           }
1083         } else {
1084           for (i=0; i<nzB; i++) {
1085             if (cmap[cworkB[i]/bs] < cstart)
1086               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1087             else break;
1088           }
1089           imark = i;
1090         }
1091         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1092         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1093       }
1094     } else {
1095       if (idx) *idx = 0;
1096       if (v)   *v   = 0;
1097     }
1098   }
1099   *nz = nztot;
1100   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1101   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1102   PetscFunctionReturn(0);
1103 }
1104 
1105 #undef __FUNCT__
1106 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1107 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1108 {
1109   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1110 
1111   PetscFunctionBegin;
1112   if (!baij->getrowactive) {
1113     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1114   }
1115   baij->getrowactive = PETSC_FALSE;
1116   PetscFunctionReturn(0);
1117 }
1118 
1119 #undef __FUNCT__
1120 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1121 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1122 {
1123   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1124   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1125 
1126   PetscFunctionBegin;
1127   aA->getrow_utriangular = PETSC_TRUE;
1128   PetscFunctionReturn(0);
1129 }
1130 #undef __FUNCT__
1131 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1132 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1133 {
1134   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1135   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1136 
1137   PetscFunctionBegin;
1138   aA->getrow_utriangular = PETSC_FALSE;
1139   PetscFunctionReturn(0);
1140 }
1141 
1142 #undef __FUNCT__
1143 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1144 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1145 {
1146   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1147   PetscErrorCode ierr;
1148 
1149   PetscFunctionBegin;
1150   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1151   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1152   PetscFunctionReturn(0);
1153 }
1154 
1155 #undef __FUNCT__
1156 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1157 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1158 {
1159   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1160   PetscErrorCode ierr;
1161 
1162   PetscFunctionBegin;
1163   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1164   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1165   PetscFunctionReturn(0);
1166 }
1167 
1168 #undef __FUNCT__
1169 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1170 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1171 {
1172   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1173   PetscErrorCode ierr;
1174 
1175   PetscFunctionBegin;
1176   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1177   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1178   PetscFunctionReturn(0);
1179 }
1180 
1181 #undef __FUNCT__
1182 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1183 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1184 {
1185   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1186   Mat            A = a->A,B = a->B;
1187   PetscErrorCode ierr;
1188   PetscReal      isend[5],irecv[5];
1189 
1190   PetscFunctionBegin;
1191   info->block_size     = (PetscReal)matin->rmap.bs;
1192   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1193   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1194   isend[3] = info->memory;  isend[4] = info->mallocs;
1195   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1196   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1197   isend[3] += info->memory;  isend[4] += info->mallocs;
1198   if (flag == MAT_LOCAL) {
1199     info->nz_used      = isend[0];
1200     info->nz_allocated = isend[1];
1201     info->nz_unneeded  = isend[2];
1202     info->memory       = isend[3];
1203     info->mallocs      = isend[4];
1204   } else if (flag == MAT_GLOBAL_MAX) {
1205     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1206     info->nz_used      = irecv[0];
1207     info->nz_allocated = irecv[1];
1208     info->nz_unneeded  = irecv[2];
1209     info->memory       = irecv[3];
1210     info->mallocs      = irecv[4];
1211   } else if (flag == MAT_GLOBAL_SUM) {
1212     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1213     info->nz_used      = irecv[0];
1214     info->nz_allocated = irecv[1];
1215     info->nz_unneeded  = irecv[2];
1216     info->memory       = irecv[3];
1217     info->mallocs      = irecv[4];
1218   } else {
1219     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1220   }
1221   info->rows_global       = (PetscReal)A->rmap.N;
1222   info->columns_global    = (PetscReal)A->cmap.N;
1223   info->rows_local        = (PetscReal)A->rmap.N;
1224   info->columns_local     = (PetscReal)A->cmap.N;
1225   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1226   info->fill_ratio_needed = 0;
1227   info->factor_mallocs    = 0;
1228   PetscFunctionReturn(0);
1229 }
1230 
1231 #undef __FUNCT__
1232 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1233 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1234 {
1235   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1236   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1237   PetscErrorCode ierr;
1238 
1239   PetscFunctionBegin;
1240   switch (op) {
1241   case MAT_NO_NEW_NONZERO_LOCATIONS:
1242   case MAT_YES_NEW_NONZERO_LOCATIONS:
1243   case MAT_COLUMNS_UNSORTED:
1244   case MAT_COLUMNS_SORTED:
1245   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1246   case MAT_KEEP_ZEROED_ROWS:
1247   case MAT_NEW_NONZERO_LOCATION_ERR:
1248     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1249     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1250     break;
1251   case MAT_ROW_ORIENTED:
1252     a->roworiented = PETSC_TRUE;
1253     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1254     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1255     break;
1256   case MAT_ROWS_SORTED:
1257   case MAT_ROWS_UNSORTED:
1258   case MAT_YES_NEW_DIAGONALS:
1259     ierr = PetscInfo(A,"Option ignored\n");CHKERRQ(ierr);
1260     break;
1261   case MAT_COLUMN_ORIENTED:
1262     a->roworiented = PETSC_FALSE;
1263     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1264     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1265     break;
1266   case MAT_IGNORE_OFF_PROC_ENTRIES:
1267     a->donotstash = PETSC_TRUE;
1268     break;
1269   case MAT_NO_NEW_DIAGONALS:
1270     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1271   case MAT_USE_HASH_TABLE:
1272     a->ht_flag = PETSC_TRUE;
1273     break;
1274   case MAT_NOT_SYMMETRIC:
1275   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1276   case MAT_HERMITIAN:
1277     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1278   case MAT_SYMMETRIC:
1279   case MAT_STRUCTURALLY_SYMMETRIC:
1280   case MAT_NOT_HERMITIAN:
1281   case MAT_SYMMETRY_ETERNAL:
1282   case MAT_NOT_SYMMETRY_ETERNAL:
1283     break;
1284   case MAT_IGNORE_LOWER_TRIANGULAR:
1285     aA->ignore_ltriangular = PETSC_TRUE;
1286     break;
1287   case MAT_ERROR_LOWER_TRIANGULAR:
1288     aA->ignore_ltriangular = PETSC_FALSE;
1289     break;
1290   case MAT_GETROW_UPPERTRIANGULAR:
1291     aA->getrow_utriangular = PETSC_TRUE;
1292     break;
1293   default:
1294     SETERRQ(PETSC_ERR_SUP,"unknown option");
1295   }
1296   PetscFunctionReturn(0);
1297 }
1298 
1299 #undef __FUNCT__
1300 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1301 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1302 {
1303   PetscErrorCode ierr;
1304   PetscFunctionBegin;
1305   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1306   PetscFunctionReturn(0);
1307 }
1308 
1309 #undef __FUNCT__
1310 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1311 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1312 {
1313   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1314   Mat            a=baij->A, b=baij->B;
1315   PetscErrorCode ierr;
1316   PetscInt       nv,m,n;
1317   PetscTruth     flg;
1318 
1319   PetscFunctionBegin;
1320   if (ll != rr){
1321     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1322     if (!flg)
1323       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1324   }
1325   if (!ll) PetscFunctionReturn(0);
1326 
1327   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1328   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1329 
1330   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1331   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1332 
1333   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1334 
1335   /* left diagonalscale the off-diagonal part */
1336   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1337 
1338   /* scale the diagonal part */
1339   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1340 
1341   /* right diagonalscale the off-diagonal part */
1342   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1343   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1344   PetscFunctionReturn(0);
1345 }
1346 
1347 #undef __FUNCT__
1348 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1349 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1350 {
1351   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1352   MPI_Comm          comm = A->comm;
1353   static PetscTruth called = PETSC_FALSE;
1354   PetscErrorCode    ierr;
1355 
1356   PetscFunctionBegin;
1357   if (!a->rank) {
1358     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1359   }
1360   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1361   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1362   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1363   PetscFunctionReturn(0);
1364 }
1365 
1366 #undef __FUNCT__
1367 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1368 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1369 {
1370   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1371   PetscErrorCode ierr;
1372 
1373   PetscFunctionBegin;
1374   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1375   PetscFunctionReturn(0);
1376 }
1377 
1378 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1379 
1380 #undef __FUNCT__
1381 #define __FUNCT__ "MatEqual_MPISBAIJ"
1382 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1383 {
1384   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1385   Mat            a,b,c,d;
1386   PetscTruth     flg;
1387   PetscErrorCode ierr;
1388 
1389   PetscFunctionBegin;
1390   a = matA->A; b = matA->B;
1391   c = matB->A; d = matB->B;
1392 
1393   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1394   if (flg) {
1395     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1396   }
1397   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1398   PetscFunctionReturn(0);
1399 }
1400 
1401 #undef __FUNCT__
1402 #define __FUNCT__ "MatCopy_MPISBAIJ"
1403 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1404 {
1405   PetscErrorCode ierr;
1406   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1407   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1408 
1409   PetscFunctionBegin;
1410   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1411   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1412     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1413     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1414     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1415   } else {
1416     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1417     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1418   }
1419   PetscFunctionReturn(0);
1420 }
1421 
1422 #undef __FUNCT__
1423 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1424 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1425 {
1426   PetscErrorCode ierr;
1427 
1428   PetscFunctionBegin;
1429   ierr = MatMPISBAIJSetPreallocation(A,1,PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1430   PetscFunctionReturn(0);
1431 }
1432 
1433 #include "petscblaslapack.h"
1434 #undef __FUNCT__
1435 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1436 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1437 {
1438   PetscErrorCode ierr;
1439   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1440   PetscBLASInt   bnz,one=1;
1441   Mat_SeqSBAIJ   *xa,*ya;
1442   Mat_SeqBAIJ    *xb,*yb;
1443 
1444   PetscFunctionBegin;
1445   if (str == SAME_NONZERO_PATTERN) {
1446     PetscScalar alpha = a;
1447     xa = (Mat_SeqSBAIJ *)xx->A->data;
1448     ya = (Mat_SeqSBAIJ *)yy->A->data;
1449     bnz = (PetscBLASInt)xa->nz;
1450     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1451     xb = (Mat_SeqBAIJ *)xx->B->data;
1452     yb = (Mat_SeqBAIJ *)yy->B->data;
1453     bnz = (PetscBLASInt)xb->nz;
1454     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1455   } else {
1456     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1457     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1458     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1459   }
1460   PetscFunctionReturn(0);
1461 }
1462 
1463 #undef __FUNCT__
1464 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1465 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1466 {
1467   PetscErrorCode ierr;
1468   PetscInt       i;
1469   PetscTruth     flg;
1470 
1471   PetscFunctionBegin;
1472   for (i=0; i<n; i++) {
1473     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1474     if (!flg) {
1475       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1476     }
1477   }
1478   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1479   PetscFunctionReturn(0);
1480 }
1481 
1482 
1483 /* -------------------------------------------------------------------*/
1484 static struct _MatOps MatOps_Values = {
1485        MatSetValues_MPISBAIJ,
1486        MatGetRow_MPISBAIJ,
1487        MatRestoreRow_MPISBAIJ,
1488        MatMult_MPISBAIJ,
1489 /* 4*/ MatMultAdd_MPISBAIJ,
1490        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1491        MatMultAdd_MPISBAIJ,
1492        0,
1493        0,
1494        0,
1495 /*10*/ 0,
1496        0,
1497        0,
1498        MatRelax_MPISBAIJ,
1499        MatTranspose_MPISBAIJ,
1500 /*15*/ MatGetInfo_MPISBAIJ,
1501        MatEqual_MPISBAIJ,
1502        MatGetDiagonal_MPISBAIJ,
1503        MatDiagonalScale_MPISBAIJ,
1504        MatNorm_MPISBAIJ,
1505 /*20*/ MatAssemblyBegin_MPISBAIJ,
1506        MatAssemblyEnd_MPISBAIJ,
1507        0,
1508        MatSetOption_MPISBAIJ,
1509        MatZeroEntries_MPISBAIJ,
1510 /*25*/ 0,
1511        0,
1512        0,
1513        0,
1514        0,
1515 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1516        0,
1517        0,
1518        0,
1519        0,
1520 /*35*/ MatDuplicate_MPISBAIJ,
1521        0,
1522        0,
1523        0,
1524        0,
1525 /*40*/ MatAXPY_MPISBAIJ,
1526        MatGetSubMatrices_MPISBAIJ,
1527        MatIncreaseOverlap_MPISBAIJ,
1528        MatGetValues_MPISBAIJ,
1529        MatCopy_MPISBAIJ,
1530 /*45*/ MatPrintHelp_MPISBAIJ,
1531        MatScale_MPISBAIJ,
1532        0,
1533        0,
1534        0,
1535 /*50*/ 0,
1536        0,
1537        0,
1538        0,
1539        0,
1540 /*55*/ 0,
1541        0,
1542        MatSetUnfactored_MPISBAIJ,
1543        0,
1544        MatSetValuesBlocked_MPISBAIJ,
1545 /*60*/ 0,
1546        0,
1547        0,
1548        0,
1549        0,
1550 /*65*/ 0,
1551        0,
1552        0,
1553        0,
1554        0,
1555 /*70*/ MatGetRowMax_MPISBAIJ,
1556        0,
1557        0,
1558        0,
1559        0,
1560 /*75*/ 0,
1561        0,
1562        0,
1563        0,
1564        0,
1565 /*80*/ 0,
1566        0,
1567        0,
1568        0,
1569        MatLoad_MPISBAIJ,
1570 /*85*/ 0,
1571        0,
1572        0,
1573        0,
1574        0,
1575 /*90*/ 0,
1576        0,
1577        0,
1578        0,
1579        0,
1580 /*95*/ 0,
1581        0,
1582        0,
1583        0,
1584        0,
1585 /*100*/0,
1586        0,
1587        0,
1588        0,
1589        0,
1590 /*105*/0,
1591        MatRealPart_MPISBAIJ,
1592        MatImaginaryPart_MPISBAIJ,
1593        MatGetRowUpperTriangular_MPISBAIJ,
1594        MatRestoreRowUpperTriangular_MPISBAIJ
1595 };
1596 
1597 
1598 EXTERN_C_BEGIN
1599 #undef __FUNCT__
1600 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1601 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1602 {
1603   PetscFunctionBegin;
1604   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1605   *iscopy = PETSC_FALSE;
1606   PetscFunctionReturn(0);
1607 }
1608 EXTERN_C_END
1609 
1610 EXTERN_C_BEGIN
1611 #undef __FUNCT__
1612 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1613 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1614 {
1615   Mat_MPISBAIJ   *b;
1616   PetscErrorCode ierr;
1617   PetscInt       i,mbs,Mbs;
1618 
1619   PetscFunctionBegin;
1620   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1621 
1622   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1623   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1624   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1625   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1626   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1627 
1628   B->rmap.bs = B->cmap.bs = bs;
1629   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1630   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1631 
1632   if (d_nnz) {
1633     for (i=0; i<B->rmap.n/bs; i++) {
1634       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1635     }
1636   }
1637   if (o_nnz) {
1638     for (i=0; i<B->rmap.n/bs; i++) {
1639       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1640     }
1641   }
1642   B->preallocated = PETSC_TRUE;
1643 
1644   b   = (Mat_MPISBAIJ*)B->data;
1645   mbs = B->rmap.n/bs;
1646   Mbs = B->rmap.N/bs;
1647   if (mbs*bs != B->rmap.n) {
1648     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1649   }
1650 
1651   B->rmap.bs  = bs;
1652   b->bs2 = bs*bs;
1653   b->mbs = mbs;
1654   b->nbs = mbs;
1655   b->Mbs = Mbs;
1656   b->Nbs = Mbs;
1657 
1658   for (i=0; i<=b->size; i++) {
1659     b->rangebs[i] = B->rmap.range[i]/bs;
1660   }
1661   b->rstartbs = B->rmap.rstart/bs;
1662   b->rendbs   = B->rmap.rend/bs;
1663 
1664   b->cstartbs = B->cmap.rstart/bs;
1665   b->cendbs   = B->cmap.rend/bs;
1666 
1667   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1668   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1669   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1670   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1671   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1672 
1673   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1674   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1675   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1676   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1677   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1678 
1679   /* build cache for off array entries formed */
1680   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1681 
1682   PetscFunctionReturn(0);
1683 }
1684 EXTERN_C_END
1685 
1686 /*MC
1687    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1688    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1689 
1690    Options Database Keys:
1691 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1692 
1693   Level: beginner
1694 
1695 .seealso: MatCreateMPISBAIJ
1696 M*/
1697 
1698 EXTERN_C_BEGIN
1699 #undef __FUNCT__
1700 #define __FUNCT__ "MatCreate_MPISBAIJ"
1701 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1702 {
1703   Mat_MPISBAIJ   *b;
1704   PetscErrorCode ierr;
1705   PetscTruth     flg;
1706 
1707   PetscFunctionBegin;
1708 
1709   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1710   B->data = (void*)b;
1711   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1712 
1713   B->ops->destroy    = MatDestroy_MPISBAIJ;
1714   B->ops->view       = MatView_MPISBAIJ;
1715   B->mapping    = 0;
1716   B->factor     = 0;
1717   B->assembled  = PETSC_FALSE;
1718 
1719   B->insertmode = NOT_SET_VALUES;
1720   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1721   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1722 
1723   /* build local table of row and column ownerships */
1724   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1725 
1726   /* build cache for off array entries formed */
1727   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1728   b->donotstash  = PETSC_FALSE;
1729   b->colmap      = PETSC_NULL;
1730   b->garray      = PETSC_NULL;
1731   b->roworiented = PETSC_TRUE;
1732 
1733 #if defined(PETSC_USE_MAT_SINGLE)
1734   /* stuff for MatSetValues_XXX in single precision */
1735   b->setvalueslen     = 0;
1736   b->setvaluescopy    = PETSC_NULL;
1737 #endif
1738 
1739   /* stuff used in block assembly */
1740   b->barray       = 0;
1741 
1742   /* stuff used for matrix vector multiply */
1743   b->lvec         = 0;
1744   b->Mvctx        = 0;
1745   b->slvec0       = 0;
1746   b->slvec0b      = 0;
1747   b->slvec1       = 0;
1748   b->slvec1a      = 0;
1749   b->slvec1b      = 0;
1750   b->sMvctx       = 0;
1751 
1752   /* stuff for MatGetRow() */
1753   b->rowindices   = 0;
1754   b->rowvalues    = 0;
1755   b->getrowactive = PETSC_FALSE;
1756 
1757   /* hash table stuff */
1758   b->ht           = 0;
1759   b->hd           = 0;
1760   b->ht_size      = 0;
1761   b->ht_flag      = PETSC_FALSE;
1762   b->ht_fact      = 0;
1763   b->ht_total_ct  = 0;
1764   b->ht_insert_ct = 0;
1765 
1766   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1767   if (flg) {
1768     PetscReal fact = 1.39;
1769     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1770     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1771     if (fact <= 1.0) fact = 1.39;
1772     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1773     ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1774   }
1775   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1776                                      "MatStoreValues_MPISBAIJ",
1777                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1778   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1779                                      "MatRetrieveValues_MPISBAIJ",
1780                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1781   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1782                                      "MatGetDiagonalBlock_MPISBAIJ",
1783                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1785                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1786                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1787   B->symmetric                  = PETSC_TRUE;
1788   B->structurally_symmetric     = PETSC_TRUE;
1789   B->symmetric_set              = PETSC_TRUE;
1790   B->structurally_symmetric_set = PETSC_TRUE;
1791   PetscFunctionReturn(0);
1792 }
1793 EXTERN_C_END
1794 
1795 /*MC
1796    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1797 
1798    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1799    and MATMPISBAIJ otherwise.
1800 
1801    Options Database Keys:
1802 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1803 
1804   Level: beginner
1805 
1806 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1807 M*/
1808 
1809 EXTERN_C_BEGIN
1810 #undef __FUNCT__
1811 #define __FUNCT__ "MatCreate_SBAIJ"
1812 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1813 {
1814   PetscErrorCode ierr;
1815   PetscMPIInt    size;
1816 
1817   PetscFunctionBegin;
1818   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1819   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1820   if (size == 1) {
1821     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1822   } else {
1823     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1824   }
1825   PetscFunctionReturn(0);
1826 }
1827 EXTERN_C_END
1828 
1829 #undef __FUNCT__
1830 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1831 /*@C
1832    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1833    the user should preallocate the matrix storage by setting the parameters
1834    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1835    performance can be increased by more than a factor of 50.
1836 
1837    Collective on Mat
1838 
1839    Input Parameters:
1840 +  A - the matrix
1841 .  bs   - size of blockk
1842 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1843            submatrix  (same for all local rows)
1844 .  d_nnz - array containing the number of block nonzeros in the various block rows
1845            in the upper triangular and diagonal part of the in diagonal portion of the local
1846            (possibly different for each block row) or PETSC_NULL.  You must leave room
1847            for the diagonal entry even if it is zero.
1848 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1849            submatrix (same for all local rows).
1850 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1851            off-diagonal portion of the local submatrix (possibly different for
1852            each block row) or PETSC_NULL.
1853 
1854 
1855    Options Database Keys:
1856 .   -mat_no_unroll - uses code that does not unroll the loops in the
1857                      block calculations (much slower)
1858 .   -mat_block_size - size of the blocks to use
1859 
1860    Notes:
1861 
1862    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1863    than it must be used on all processors that share the object for that argument.
1864 
1865    If the *_nnz parameter is given then the *_nz parameter is ignored
1866 
1867    Storage Information:
1868    For a square global matrix we define each processor's diagonal portion
1869    to be its local rows and the corresponding columns (a square submatrix);
1870    each processor's off-diagonal portion encompasses the remainder of the
1871    local matrix (a rectangular submatrix).
1872 
1873    The user can specify preallocated storage for the diagonal part of
1874    the local submatrix with either d_nz or d_nnz (not both).  Set
1875    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1876    memory allocation.  Likewise, specify preallocated storage for the
1877    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1878 
1879    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1880    the figure below we depict these three local rows and all columns (0-11).
1881 
1882 .vb
1883            0 1 2 3 4 5 6 7 8 9 10 11
1884           -------------------
1885    row 3  |  o o o d d d o o o o o o
1886    row 4  |  o o o d d d o o o o o o
1887    row 5  |  o o o d d d o o o o o o
1888           -------------------
1889 .ve
1890 
1891    Thus, any entries in the d locations are stored in the d (diagonal)
1892    submatrix, and any entries in the o locations are stored in the
1893    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1894    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1895 
1896    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1897    plus the diagonal part of the d matrix,
1898    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1899    In general, for PDE problems in which most nonzeros are near the diagonal,
1900    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1901    or you will get TERRIBLE performance; see the users' manual chapter on
1902    matrices.
1903 
1904    Level: intermediate
1905 
1906 .keywords: matrix, block, aij, compressed row, sparse, parallel
1907 
1908 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1909 @*/
1910 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1911 {
1912   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1913 
1914   PetscFunctionBegin;
1915   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1916   if (f) {
1917     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1918   }
1919   PetscFunctionReturn(0);
1920 }
1921 
1922 #undef __FUNCT__
1923 #define __FUNCT__ "MatCreateMPISBAIJ"
1924 /*@C
1925    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1926    (block compressed row).  For good matrix assembly performance
1927    the user should preallocate the matrix storage by setting the parameters
1928    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1929    performance can be increased by more than a factor of 50.
1930 
1931    Collective on MPI_Comm
1932 
1933    Input Parameters:
1934 +  comm - MPI communicator
1935 .  bs   - size of blockk
1936 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1937            This value should be the same as the local size used in creating the
1938            y vector for the matrix-vector product y = Ax.
1939 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1940            This value should be the same as the local size used in creating the
1941            x vector for the matrix-vector product y = Ax.
1942 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1943 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1944 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1945            submatrix  (same for all local rows)
1946 .  d_nnz - array containing the number of block nonzeros in the various block rows
1947            in the upper triangular portion of the in diagonal portion of the local
1948            (possibly different for each block block row) or PETSC_NULL.
1949            You must leave room for the diagonal entry even if it is zero.
1950 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1951            submatrix (same for all local rows).
1952 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1953            off-diagonal portion of the local submatrix (possibly different for
1954            each block row) or PETSC_NULL.
1955 
1956    Output Parameter:
1957 .  A - the matrix
1958 
1959    Options Database Keys:
1960 .   -mat_no_unroll - uses code that does not unroll the loops in the
1961                      block calculations (much slower)
1962 .   -mat_block_size - size of the blocks to use
1963 .   -mat_mpi - use the parallel matrix data structures even on one processor
1964                (defaults to using SeqBAIJ format on one processor)
1965 
1966    Notes:
1967    The number of rows and columns must be divisible by blocksize.
1968 
1969    The user MUST specify either the local or global matrix dimensions
1970    (possibly both).
1971 
1972    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1973    than it must be used on all processors that share the object for that argument.
1974 
1975    If the *_nnz parameter is given then the *_nz parameter is ignored
1976 
1977    Storage Information:
1978    For a square global matrix we define each processor's diagonal portion
1979    to be its local rows and the corresponding columns (a square submatrix);
1980    each processor's off-diagonal portion encompasses the remainder of the
1981    local matrix (a rectangular submatrix).
1982 
1983    The user can specify preallocated storage for the diagonal part of
1984    the local submatrix with either d_nz or d_nnz (not both).  Set
1985    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1986    memory allocation.  Likewise, specify preallocated storage for the
1987    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1988 
1989    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1990    the figure below we depict these three local rows and all columns (0-11).
1991 
1992 .vb
1993            0 1 2 3 4 5 6 7 8 9 10 11
1994           -------------------
1995    row 3  |  o o o d d d o o o o o o
1996    row 4  |  o o o d d d o o o o o o
1997    row 5  |  o o o d d d o o o o o o
1998           -------------------
1999 .ve
2000 
2001    Thus, any entries in the d locations are stored in the d (diagonal)
2002    submatrix, and any entries in the o locations are stored in the
2003    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2004    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2005 
2006    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2007    plus the diagonal part of the d matrix,
2008    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2009    In general, for PDE problems in which most nonzeros are near the diagonal,
2010    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2011    or you will get TERRIBLE performance; see the users' manual chapter on
2012    matrices.
2013 
2014    Level: intermediate
2015 
2016 .keywords: matrix, block, aij, compressed row, sparse, parallel
2017 
2018 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2019 @*/
2020 
2021 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2022 {
2023   PetscErrorCode ierr;
2024   PetscMPIInt    size;
2025 
2026   PetscFunctionBegin;
2027   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2028   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2029   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2030   if (size > 1) {
2031     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2032     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2033   } else {
2034     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2035     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2036   }
2037   PetscFunctionReturn(0);
2038 }
2039 
2040 
2041 #undef __FUNCT__
2042 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2043 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2044 {
2045   Mat            mat;
2046   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2047   PetscErrorCode ierr;
2048   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2049   PetscScalar    *array;
2050 
2051   PetscFunctionBegin;
2052   *newmat       = 0;
2053   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2054   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2055   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2056   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2057   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2058   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2059 
2060   mat->factor       = matin->factor;
2061   mat->preallocated = PETSC_TRUE;
2062   mat->assembled    = PETSC_TRUE;
2063   mat->insertmode   = NOT_SET_VALUES;
2064 
2065   a = (Mat_MPISBAIJ*)mat->data;
2066   a->bs2   = oldmat->bs2;
2067   a->mbs   = oldmat->mbs;
2068   a->nbs   = oldmat->nbs;
2069   a->Mbs   = oldmat->Mbs;
2070   a->Nbs   = oldmat->Nbs;
2071 
2072 
2073   a->size         = oldmat->size;
2074   a->rank         = oldmat->rank;
2075   a->donotstash   = oldmat->donotstash;
2076   a->roworiented  = oldmat->roworiented;
2077   a->rowindices   = 0;
2078   a->rowvalues    = 0;
2079   a->getrowactive = PETSC_FALSE;
2080   a->barray       = 0;
2081   a->rstartbs    = oldmat->rstartbs;
2082   a->rendbs      = oldmat->rendbs;
2083   a->cstartbs    = oldmat->cstartbs;
2084   a->cendbs      = oldmat->cendbs;
2085 
2086   /* hash table stuff */
2087   a->ht           = 0;
2088   a->hd           = 0;
2089   a->ht_size      = 0;
2090   a->ht_flag      = oldmat->ht_flag;
2091   a->ht_fact      = oldmat->ht_fact;
2092   a->ht_total_ct  = 0;
2093   a->ht_insert_ct = 0;
2094 
2095   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2096   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2097   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2098   if (oldmat->colmap) {
2099 #if defined (PETSC_USE_CTABLE)
2100     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2101 #else
2102     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2103     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2104     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2105 #endif
2106   } else a->colmap = 0;
2107 
2108   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2109     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2110     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2111     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2112   } else a->garray = 0;
2113 
2114   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2115   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2116   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2117   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2118 
2119   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2121   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2122   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2123 
2124   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2125   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2126   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2127   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2128   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2129   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2130   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2131   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2132   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2133   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2134   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2135   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2136   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2137 
2138   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2139   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2140   a->sMvctx = oldmat->sMvctx;
2141   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2142 
2143   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2144   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2145   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2146   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2147   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2148   *newmat = mat;
2149   PetscFunctionReturn(0);
2150 }
2151 
2152 #include "petscsys.h"
2153 
2154 #undef __FUNCT__
2155 #define __FUNCT__ "MatLoad_MPISBAIJ"
2156 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2157 {
2158   Mat            A;
2159   PetscErrorCode ierr;
2160   PetscInt       i,nz,j,rstart,rend;
2161   PetscScalar    *vals,*buf;
2162   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2163   MPI_Status     status;
2164   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2165   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2166   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2167   PetscInt       bs=1,Mbs,mbs,extra_rows;
2168   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2169   PetscInt       dcount,kmax,k,nzcount,tmp;
2170   int            fd;
2171 
2172   PetscFunctionBegin;
2173   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2174 
2175   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2176   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2177   if (!rank) {
2178     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2179     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2180     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2181     if (header[3] < 0) {
2182       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2183     }
2184   }
2185 
2186   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2187   M = header[1]; N = header[2];
2188 
2189   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2190 
2191   /*
2192      This code adds extra rows to make sure the number of rows is
2193      divisible by the blocksize
2194   */
2195   Mbs        = M/bs;
2196   extra_rows = bs - M + bs*(Mbs);
2197   if (extra_rows == bs) extra_rows = 0;
2198   else                  Mbs++;
2199   if (extra_rows &&!rank) {
2200     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2201   }
2202 
2203   /* determine ownership of all rows */
2204   mbs        = Mbs/size + ((Mbs % size) > rank);
2205   m          = mbs*bs;
2206   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2207   browners   = rowners + size + 1;
2208   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2209   rowners[0] = 0;
2210   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2211   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2212   rstart = rowners[rank];
2213   rend   = rowners[rank+1];
2214 
2215   /* distribute row lengths to all processors */
2216   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2217   if (!rank) {
2218     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2219     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2220     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2221     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2222     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2223     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2224     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2225   } else {
2226     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2227   }
2228 
2229   if (!rank) {   /* procs[0] */
2230     /* calculate the number of nonzeros on each processor */
2231     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2232     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2233     for (i=0; i<size; i++) {
2234       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2235         procsnz[i] += rowlengths[j];
2236       }
2237     }
2238     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2239 
2240     /* determine max buffer needed and allocate it */
2241     maxnz = 0;
2242     for (i=0; i<size; i++) {
2243       maxnz = PetscMax(maxnz,procsnz[i]);
2244     }
2245     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2246 
2247     /* read in my part of the matrix column indices  */
2248     nz     = procsnz[0];
2249     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2250     mycols = ibuf;
2251     if (size == 1)  nz -= extra_rows;
2252     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2253     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2254 
2255     /* read in every ones (except the last) and ship off */
2256     for (i=1; i<size-1; i++) {
2257       nz   = procsnz[i];
2258       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2259       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2260     }
2261     /* read in the stuff for the last proc */
2262     if (size != 1) {
2263       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2264       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2265       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2266       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2267     }
2268     ierr = PetscFree(cols);CHKERRQ(ierr);
2269   } else {  /* procs[i], i>0 */
2270     /* determine buffer space needed for message */
2271     nz = 0;
2272     for (i=0; i<m; i++) {
2273       nz += locrowlens[i];
2274     }
2275     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2276     mycols = ibuf;
2277     /* receive message of column indices*/
2278     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2279     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2280     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2281   }
2282 
2283   /* loop over local rows, determining number of off diagonal entries */
2284   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2285   odlens   = dlens + (rend-rstart);
2286   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2287   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2288   masked1  = mask    + Mbs;
2289   masked2  = masked1 + Mbs;
2290   rowcount = 0; nzcount = 0;
2291   for (i=0; i<mbs; i++) {
2292     dcount  = 0;
2293     odcount = 0;
2294     for (j=0; j<bs; j++) {
2295       kmax = locrowlens[rowcount];
2296       for (k=0; k<kmax; k++) {
2297         tmp = mycols[nzcount++]/bs; /* block col. index */
2298         if (!mask[tmp]) {
2299           mask[tmp] = 1;
2300           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2301           else masked1[dcount++] = tmp; /* entry in diag portion */
2302         }
2303       }
2304       rowcount++;
2305     }
2306 
2307     dlens[i]  = dcount;  /* d_nzz[i] */
2308     odlens[i] = odcount; /* o_nzz[i] */
2309 
2310     /* zero out the mask elements we set */
2311     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2312     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2313   }
2314 
2315   /* create our matrix */
2316   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2317   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2318   ierr = MatSetType(A,type);CHKERRQ(ierr);
2319   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2320   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2321 
2322   if (!rank) {
2323     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2324     /* read in my part of the matrix numerical values  */
2325     nz = procsnz[0];
2326     vals = buf;
2327     mycols = ibuf;
2328     if (size == 1)  nz -= extra_rows;
2329     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2330     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2331 
2332     /* insert into matrix */
2333     jj      = rstart*bs;
2334     for (i=0; i<m; i++) {
2335       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2336       mycols += locrowlens[i];
2337       vals   += locrowlens[i];
2338       jj++;
2339     }
2340 
2341     /* read in other processors (except the last one) and ship out */
2342     for (i=1; i<size-1; i++) {
2343       nz   = procsnz[i];
2344       vals = buf;
2345       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2346       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2347     }
2348     /* the last proc */
2349     if (size != 1){
2350       nz   = procsnz[i] - extra_rows;
2351       vals = buf;
2352       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2353       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2354       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2355     }
2356     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2357 
2358   } else {
2359     /* receive numeric values */
2360     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2361 
2362     /* receive message of values*/
2363     vals   = buf;
2364     mycols = ibuf;
2365     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2366     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2367     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2368 
2369     /* insert into matrix */
2370     jj      = rstart*bs;
2371     for (i=0; i<m; i++) {
2372       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2373       mycols += locrowlens[i];
2374       vals   += locrowlens[i];
2375       jj++;
2376     }
2377   }
2378 
2379   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2380   ierr = PetscFree(buf);CHKERRQ(ierr);
2381   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2382   ierr = PetscFree(rowners);CHKERRQ(ierr);
2383   ierr = PetscFree(dlens);CHKERRQ(ierr);
2384   ierr = PetscFree(mask);CHKERRQ(ierr);
2385   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2386   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2387   *newmat = A;
2388   PetscFunctionReturn(0);
2389 }
2390 
2391 #undef __FUNCT__
2392 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2393 /*XXXXX@
2394    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2395 
2396    Input Parameters:
2397 .  mat  - the matrix
2398 .  fact - factor
2399 
2400    Collective on Mat
2401 
2402    Level: advanced
2403 
2404   Notes:
2405    This can also be set by the command line option: -mat_use_hash_table fact
2406 
2407 .keywords: matrix, hashtable, factor, HT
2408 
2409 .seealso: MatSetOption()
2410 @XXXXX*/
2411 
2412 
2413 #undef __FUNCT__
2414 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2415 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2416 {
2417   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2418   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2419   PetscReal      atmp;
2420   PetscReal      *work,*svalues,*rvalues;
2421   PetscErrorCode ierr;
2422   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2423   PetscMPIInt    rank,size;
2424   PetscInt       *rowners_bs,dest,count,source;
2425   PetscScalar    *va;
2426   MatScalar      *ba;
2427   MPI_Status     stat;
2428 
2429   PetscFunctionBegin;
2430   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2431   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2432 
2433   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2434   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2435 
2436   bs   = A->rmap.bs;
2437   mbs  = a->mbs;
2438   Mbs  = a->Mbs;
2439   ba   = b->a;
2440   bi   = b->i;
2441   bj   = b->j;
2442 
2443   /* find ownerships */
2444   rowners_bs = A->rmap.range;
2445 
2446   /* each proc creates an array to be distributed */
2447   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2448   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2449 
2450   /* row_max for B */
2451   if (rank != size-1){
2452     for (i=0; i<mbs; i++) {
2453       ncols = bi[1] - bi[0]; bi++;
2454       brow  = bs*i;
2455       for (j=0; j<ncols; j++){
2456         bcol = bs*(*bj);
2457         for (kcol=0; kcol<bs; kcol++){
2458           col = bcol + kcol;                 /* local col index */
2459           col += rowners_bs[rank+1];      /* global col index */
2460           for (krow=0; krow<bs; krow++){
2461             atmp = PetscAbsScalar(*ba); ba++;
2462             row = brow + krow;    /* local row index */
2463             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2464             if (work[col] < atmp) work[col] = atmp;
2465           }
2466         }
2467         bj++;
2468       }
2469     }
2470 
2471     /* send values to its owners */
2472     for (dest=rank+1; dest<size; dest++){
2473       svalues = work + rowners_bs[dest];
2474       count   = rowners_bs[dest+1]-rowners_bs[dest];
2475       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2476     }
2477   }
2478 
2479   /* receive values */
2480   if (rank){
2481     rvalues = work;
2482     count   = rowners_bs[rank+1]-rowners_bs[rank];
2483     for (source=0; source<rank; source++){
2484       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2485       /* process values */
2486       for (i=0; i<count; i++){
2487         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2488       }
2489     }
2490   }
2491 
2492   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2493   ierr = PetscFree(work);CHKERRQ(ierr);
2494   PetscFunctionReturn(0);
2495 }
2496 
2497 #undef __FUNCT__
2498 #define __FUNCT__ "MatRelax_MPISBAIJ"
2499 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2500 {
2501   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2502   PetscErrorCode ierr;
2503   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2504   PetscScalar    *x,*b,*ptr,zero=0.0;
2505   Vec            bb1;
2506 
2507   PetscFunctionBegin;
2508   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2509   if (bs > 1)
2510     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2511 
2512   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2513     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2514       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2515       its--;
2516     }
2517 
2518     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2519     while (its--){
2520 
2521       /* lower triangular part: slvec0b = - B^T*xx */
2522       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2523 
2524       /* copy xx into slvec0a */
2525       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2526       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2527       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2528       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2529 
2530       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2531 
2532       /* copy bb into slvec1a */
2533       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2534       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2535       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2536       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2537 
2538       /* set slvec1b = 0 */
2539       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2540 
2541       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2542       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2543       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2544       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2545 
2546       /* upper triangular part: bb1 = bb1 - B*x */
2547       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2548 
2549       /* local diagonal sweep */
2550       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2551     }
2552     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2553   } else {
2554     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2555   }
2556   PetscFunctionReturn(0);
2557 }
2558 
2559 #undef __FUNCT__
2560 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2561 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2562 {
2563   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2564   PetscErrorCode ierr;
2565   Vec            lvec1,bb1;
2566 
2567   PetscFunctionBegin;
2568   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2569   if (matin->rmap.bs > 1)
2570     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2571 
2572   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2573     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2574       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2575       its--;
2576     }
2577 
2578     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2579     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2580     while (its--){
2581       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2582 
2583       /* lower diagonal part: bb1 = bb - B^T*xx */
2584       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2585       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2586 
2587       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2588       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2589       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2590 
2591       /* upper diagonal part: bb1 = bb1 - B*x */
2592       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2593       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2594 
2595       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2596 
2597       /* diagonal sweep */
2598       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2599     }
2600     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2601     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2602   } else {
2603     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2604   }
2605   PetscFunctionReturn(0);
2606 }
2607 
2608