xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision 69bb7ac94e38f481a514b28cb10ff7ece56113f9)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=mat->rmap.rstart;
218   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
219   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc = PETSC_NULL;
238   MatScalar     *v_loc = PETSC_NULL;
239 
240   PetscFunctionBegin;
241 
242   if (!baij->donotstash){
243     if (n > baij->n_loc) {
244       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
245       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
246       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
247       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
248       baij->n_loc = n;
249     }
250     in_loc = baij->in_loc;
251     v_loc  = baij->v_loc;
252   }
253 
254   for (i=0; i<m; i++) {
255     if (im[i] < 0) continue;
256 #if defined(PETSC_USE_DEBUG)
257     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
258 #endif
259     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
260       row = im[i] - rstart_orig;              /* local row index */
261       for (j=0; j<n; j++) {
262         if (im[i]/bs > in[j]/bs){
263           if (a->ignore_ltriangular){
264             continue;    /* ignore lower triangular blocks */
265           } else {
266             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
267           }
268         }
269         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
270           col = in[j] - cstart_orig;          /* local col index */
271           brow = row/bs; bcol = col/bs;
272           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
273           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
274           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
275           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
276         } else if (in[j] < 0) continue;
277 #if defined(PETSC_USE_DEBUG)
278         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
279 #endif
280         else {  /* off-diag entry (B) */
281           if (mat->was_assembled) {
282             if (!baij->colmap) {
283               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
284             }
285 #if defined (PETSC_USE_CTABLE)
286             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
287             col  = col - 1;
288 #else
289             col = baij->colmap[in[j]/bs] - 1;
290 #endif
291             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
292               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
293               col =  in[j];
294               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
295               B = baij->B;
296               b = (Mat_SeqBAIJ*)(B)->data;
297               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
298               ba=b->a;
299             } else col += in[j]%bs;
300           } else col = in[j];
301           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
302           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
303           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
304         }
305       }
306     } else {  /* off processor entry */
307       if (!baij->donotstash) {
308         n_loc = 0;
309         for (j=0; j<n; j++){
310           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
311           in_loc[n_loc] = in[j];
312           if (roworiented) {
313             v_loc[n_loc] = v[i*n+j];
314           } else {
315             v_loc[n_loc] = v[j*m+i];
316           }
317           n_loc++;
318         }
319         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
320       }
321     }
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 #undef __FUNCT__
327 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
328 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
329 {
330   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
331   const MatScalar *value;
332   MatScalar       *barray=baij->barray;
333   PetscTruth      roworiented = baij->roworiented;
334   PetscErrorCode  ierr;
335   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
336   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
337   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
338 
339   PetscFunctionBegin;
340   if(!barray) {
341     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
342     baij->barray = barray;
343   }
344 
345   if (roworiented) {
346     stepval = (n-1)*bs;
347   } else {
348     stepval = (m-1)*bs;
349   }
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_DEBUG)
353     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
354 #endif
355     if (im[i] >= rstart && im[i] < rend) {
356       row = im[i] - rstart;
357       for (j=0; j<n; j++) {
358         /* If NumCol = 1 then a copy is not required */
359         if ((roworiented) && (n == 1)) {
360           barray = (MatScalar*) v + i*bs2;
361         } else if((!roworiented) && (m == 1)) {
362           barray = (MatScalar*) v + j*bs2;
363         } else { /* Here a copy is required */
364           if (roworiented) {
365             value = v + i*(stepval+bs)*bs + j*bs;
366           } else {
367             value = v + j*(stepval+bs)*bs + i*bs;
368           }
369           for (ii=0; ii<bs; ii++,value+=stepval) {
370             for (jj=0; jj<bs; jj++) {
371               *barray++  = *value++;
372             }
373           }
374           barray -=bs2;
375         }
376 
377         if (in[j] >= cstart && in[j] < cend){
378           col  = in[j] - cstart;
379           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
380         }
381         else if (in[j] < 0) continue;
382 #if defined(PETSC_USE_DEBUG)
383         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
384 #endif
385         else {
386           if (mat->was_assembled) {
387             if (!baij->colmap) {
388               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
389             }
390 
391 #if defined(PETSC_USE_DEBUG)
392 #if defined (PETSC_USE_CTABLE)
393             { PetscInt data;
394               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
395               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
396             }
397 #else
398             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
399 #endif
400 #endif
401 #if defined (PETSC_USE_CTABLE)
402 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
403             col  = (col - 1)/bs;
404 #else
405             col = (baij->colmap[in[j]] - 1)/bs;
406 #endif
407             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
408               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
409               col =  in[j];
410             }
411           }
412           else col = in[j];
413           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
414         }
415       }
416     } else {
417       if (!baij->donotstash) {
418         if (roworiented) {
419           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
420         } else {
421           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
422         }
423       }
424     }
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatGetValues_MPISBAIJ"
431 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
432 {
433   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
434   PetscErrorCode ierr;
435   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
436   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
437 
438   PetscFunctionBegin;
439   for (i=0; i<m; i++) {
440     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
441     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
442     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
443       row = idxm[i] - bsrstart;
444       for (j=0; j<n; j++) {
445         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
446         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
447         if (idxn[j] >= bscstart && idxn[j] < bscend){
448           col = idxn[j] - bscstart;
449           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
450         } else {
451           if (!baij->colmap) {
452             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
453           }
454 #if defined (PETSC_USE_CTABLE)
455           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
456           data --;
457 #else
458           data = baij->colmap[idxn[j]/bs]-1;
459 #endif
460           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
461           else {
462             col  = data + idxn[j]%bs;
463             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
464           }
465         }
466       }
467     } else {
468       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
469     }
470   }
471  PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatNorm_MPISBAIJ"
476 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
477 {
478   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
479   PetscErrorCode ierr;
480   PetscReal      sum[2],*lnorm2;
481 
482   PetscFunctionBegin;
483   if (baij->size == 1) {
484     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
485   } else {
486     if (type == NORM_FROBENIUS) {
487       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
488       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
489       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
490       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
491       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
492       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
493       *norm = sqrt(sum[0] + 2*sum[1]);
494       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
495     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
496       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
497       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
498       PetscReal    *rsum,*rsum2,vabs;
499       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
500       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
501       MatScalar    *v;
502 
503       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
504       rsum2 = rsum + mat->cmap.N;
505       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
506       /* Amat */
507       v = amat->a; jj = amat->j;
508       for (brow=0; brow<mbs; brow++) {
509         grow = bs*(rstart + brow);
510         nz = amat->i[brow+1] - amat->i[brow];
511         for (bcol=0; bcol<nz; bcol++){
512           gcol = bs*(rstart + *jj); jj++;
513           for (col=0; col<bs; col++){
514             for (row=0; row<bs; row++){
515               vabs = PetscAbsScalar(*v); v++;
516               rsum[gcol+col] += vabs;
517               /* non-diagonal block */
518               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
519             }
520           }
521         }
522       }
523       /* Bmat */
524       v = bmat->a; jj = bmat->j;
525       for (brow=0; brow<mbs; brow++) {
526         grow = bs*(rstart + brow);
527         nz = bmat->i[brow+1] - bmat->i[brow];
528         for (bcol=0; bcol<nz; bcol++){
529           gcol = bs*garray[*jj]; jj++;
530           for (col=0; col<bs; col++){
531             for (row=0; row<bs; row++){
532               vabs = PetscAbsScalar(*v); v++;
533               rsum[gcol+col] += vabs;
534               rsum[grow+row] += vabs;
535             }
536           }
537         }
538       }
539       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
540       *norm = 0.0;
541       for (col=0; col<mat->cmap.N; col++) {
542         if (rsum2[col] > *norm) *norm = rsum2[col];
543       }
544       ierr = PetscFree(rsum);CHKERRQ(ierr);
545     } else {
546       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
547     }
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
554 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
555 {
556   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
557   PetscErrorCode ierr;
558   PetscInt       nstash,reallocs;
559   InsertMode     addv;
560 
561   PetscFunctionBegin;
562   if (baij->donotstash) {
563     PetscFunctionReturn(0);
564   }
565 
566   /* make sure all processors are either in INSERTMODE or ADDMODE */
567   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
568   if (addv == (ADD_VALUES|INSERT_VALUES)) {
569     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
570   }
571   mat->insertmode = addv; /* in case this processor had no cache */
572 
573   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
574   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
575   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
576   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
577   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
578   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
584 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
585 {
586   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
587   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
588   PetscErrorCode ierr;
589   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
590   PetscInt       *row,*col,other_disassembled;
591   PetscMPIInt    n;
592   PetscTruth     r1,r2,r3;
593   MatScalar      *val;
594   InsertMode     addv = mat->insertmode;
595 
596   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
597   PetscFunctionBegin;
598 
599   if (!baij->donotstash) {
600     while (1) {
601       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
602       if (!flg) break;
603 
604       for (i=0; i<n;) {
605         /* Now identify the consecutive vals belonging to the same row */
606         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
607         if (j < n) ncols = j-i;
608         else       ncols = n-i;
609         /* Now assemble all these values with a single function call */
610         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
611         i = j;
612       }
613     }
614     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
615     /* Now process the block-stash. Since the values are stashed column-oriented,
616        set the roworiented flag to column oriented, and after MatSetValues()
617        restore the original flags */
618     r1 = baij->roworiented;
619     r2 = a->roworiented;
620     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
621     baij->roworiented = PETSC_FALSE;
622     a->roworiented    = PETSC_FALSE;
623     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
624     while (1) {
625       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
626       if (!flg) break;
627 
628       for (i=0; i<n;) {
629         /* Now identify the consecutive vals belonging to the same row */
630         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
631         if (j < n) ncols = j-i;
632         else       ncols = n-i;
633         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
634         i = j;
635       }
636     }
637     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
638     baij->roworiented = r1;
639     a->roworiented    = r2;
640     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
641   }
642 
643   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
644   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
645 
646   /* determine if any processor has disassembled, if so we must
647      also disassemble ourselfs, in order that we may reassemble. */
648   /*
649      if nonzero structure of submatrix B cannot change then we know that
650      no processor disassembled thus we can skip this stuff
651   */
652   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
653     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
654     if (mat->was_assembled && !other_disassembled) {
655       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
656     }
657   }
658 
659   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
660     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
661   }
662   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
663   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
664   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
665 
666   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
667   baij->rowvalues = 0;
668 
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675 {
676   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode    ierr;
678   PetscInt          bs = mat->rmap.bs;
679   PetscMPIInt       size = baij->size,rank = baij->rank;
680   PetscTruth        iascii,isdraw;
681   PetscViewer       sviewer;
682   PetscViewerFormat format;
683 
684   PetscFunctionBegin;
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
687   if (iascii) {
688     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
692       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
702       PetscFunctionReturn(0);
703     } else if (format == PETSC_VIEWER_ASCII_INFO) {
704       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
705       PetscFunctionReturn(0);
706     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
707       PetscFunctionReturn(0);
708     }
709   }
710 
711   if (isdraw) {
712     PetscDraw       draw;
713     PetscTruth isnull;
714     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
715     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
716   }
717 
718   if (size == 1) {
719     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
720     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
721   } else {
722     /* assemble the entire matrix onto first processor. */
723     Mat         A;
724     Mat_SeqSBAIJ *Aloc;
725     Mat_SeqBAIJ *Bloc;
726     PetscInt         M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
727     MatScalar   *a;
728 
729     /* Should this be the same type as mat? */
730     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
737     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
738     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
739 
740     /* copy over the A part */
741     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
742     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
743     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
744 
745     for (i=0; i<mbs; i++) {
746       rvals[0] = mat->rmap.rstart + bs*i;
747       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
748       for (j=ai[i]; j<ai[i+1]; j++) {
749         col = mat->cmap.rstart+aj[j]*bs;
750         for (k=0; k<bs; k++) {
751           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
752           col++; a += bs;
753         }
754       }
755     }
756     /* copy over the B part */
757     Bloc = (Mat_SeqBAIJ*)baij->B->data;
758     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
759     for (i=0; i<mbs; i++) {
760       rvals[0] = mat->rmap.rstart + bs;
761       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
762       for (j=ai[i]; j<ai[i+1]; j++) {
763         col = baij->garray[aj[j]]*bs;
764         for (k=0; k<bs; k++) {
765           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
766           col++; a += bs;
767         }
768       }
769     }
770     ierr = PetscFree(rvals);CHKERRQ(ierr);
771     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
772     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773     /*
774        Everyone has to call to draw the matrix since the graphics waits are
775        synchronized across all processors that share the PetscDraw object
776     */
777     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
778     if (!rank) {
779       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
780       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
781     }
782     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
783     ierr = MatDestroy(A);CHKERRQ(ierr);
784   }
785   PetscFunctionReturn(0);
786 }
787 
788 #undef __FUNCT__
789 #define __FUNCT__ "MatView_MPISBAIJ"
790 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
791 {
792   PetscErrorCode ierr;
793   PetscTruth     iascii,isdraw,issocket,isbinary;
794 
795   PetscFunctionBegin;
796   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
797   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
800   if (iascii || isdraw || issocket || isbinary) {
801     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
802   } else {
803     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
804   }
805   PetscFunctionReturn(0);
806 }
807 
808 #undef __FUNCT__
809 #define __FUNCT__ "MatDestroy_MPISBAIJ"
810 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
811 {
812   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
813   PetscErrorCode ierr;
814 
815   PetscFunctionBegin;
816 #if defined(PETSC_USE_LOG)
817   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
818 #endif
819   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
820   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
821   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
822   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
823 #if defined (PETSC_USE_CTABLE)
824   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
825 #else
826   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
827 #endif
828   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
829   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
830   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
831   if (baij->slvec0) {
832     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
833     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
834   }
835   if (baij->slvec1) {
836     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
837     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
838     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
839   }
840   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
841   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
842   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
843   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
844 #if defined(PETSC_USE_MAT_SINGLE)
845   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
846 #endif
847   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
848   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
849   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
850   ierr = PetscFree(baij);CHKERRQ(ierr);
851 
852   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
853   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
856   PetscFunctionReturn(0);
857 }
858 
859 #undef __FUNCT__
860 #define __FUNCT__ "MatMult_MPISBAIJ"
861 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
862 {
863   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
864   PetscErrorCode ierr;
865   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
866   PetscScalar    *x,*from,zero=0.0;
867 
868   PetscFunctionBegin;
869   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
870   if (nt != A->cmap.n) {
871     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
872   }
873 
874   /* diagonal part */
875   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
876   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
877 
878   /* subdiagonal part */
879   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
880   CHKMEMQ;
881   /* copy x into the vec slvec0 */
882   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
883   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
884   CHKMEMQ;
885   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
886   CHKMEMQ;
887   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
888 
889   CHKMEMQ;
890   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
891   CHKMEMQ;
892   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
893   CHKMEMQ;
894   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
895     CHKMEMQ;
896   /* supperdiagonal part */
897   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
898     CHKMEMQ;
899   PetscFunctionReturn(0);
900 }
901 
902 #undef __FUNCT__
903 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
904 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
905 {
906   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
907   PetscErrorCode ierr;
908   PetscInt       nt;
909 
910   PetscFunctionBegin;
911   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
912   if (nt != A->cmap.n) {
913     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
914   }
915   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
916   if (nt != A->rmap.N) {
917     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
918   }
919 
920   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
921   /* do diagonal part */
922   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
923   /* do supperdiagonal part */
924   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
925   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
926   /* do subdiagonal part */
927   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
928   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
929   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
930 
931   PetscFunctionReturn(0);
932 }
933 
934 #undef __FUNCT__
935 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
936 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
937 {
938   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
939   PetscErrorCode ierr;
940   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
941   PetscScalar    *x,*from,zero=0.0;
942 
943   PetscFunctionBegin;
944   /*
945   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
946   PetscSynchronizedFlush(A->comm);
947   */
948   /* diagonal part */
949   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
950   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
951 
952   /* subdiagonal part */
953   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
954 
955   /* copy x into the vec slvec0 */
956   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
957   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
958   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
959   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
960 
961   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
962   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
963   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
964 
965   /* supperdiagonal part */
966   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
967 
968   PetscFunctionReturn(0);
969 }
970 
971 #undef __FUNCT__
972 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
973 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
974 {
975   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
976   PetscErrorCode ierr;
977 
978   PetscFunctionBegin;
979   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
980   /* do diagonal part */
981   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
982   /* do supperdiagonal part */
983   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
984   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
985 
986   /* do subdiagonal part */
987   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
988   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
989   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
990 
991   PetscFunctionReturn(0);
992 }
993 
994 /*
995   This only works correctly for square matrices where the subblock A->A is the
996    diagonal block
997 */
998 #undef __FUNCT__
999 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1000 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1001 {
1002   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1003   PetscErrorCode ierr;
1004 
1005   PetscFunctionBegin;
1006   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1007   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1008   PetscFunctionReturn(0);
1009 }
1010 
1011 #undef __FUNCT__
1012 #define __FUNCT__ "MatScale_MPISBAIJ"
1013 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1014 {
1015   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1016   PetscErrorCode ierr;
1017 
1018   PetscFunctionBegin;
1019   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1020   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1021   PetscFunctionReturn(0);
1022 }
1023 
1024 #undef __FUNCT__
1025 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1026 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1027 {
1028   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1029   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1030   PetscErrorCode ierr;
1031   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1032   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1033   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1034 
1035   PetscFunctionBegin;
1036   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1037   mat->getrowactive = PETSC_TRUE;
1038 
1039   if (!mat->rowvalues && (idx || v)) {
1040     /*
1041         allocate enough space to hold information from the longest row.
1042     */
1043     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1044     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1045     PetscInt     max = 1,mbs = mat->mbs,tmp;
1046     for (i=0; i<mbs; i++) {
1047       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1048       if (max < tmp) { max = tmp; }
1049     }
1050     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1051     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1052   }
1053 
1054   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1055   lrow = row - brstart;  /* local row index */
1056 
1057   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1058   if (!v)   {pvA = 0; pvB = 0;}
1059   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1060   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1061   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1062   nztot = nzA + nzB;
1063 
1064   cmap  = mat->garray;
1065   if (v  || idx) {
1066     if (nztot) {
1067       /* Sort by increasing column numbers, assuming A and B already sorted */
1068       PetscInt imark = -1;
1069       if (v) {
1070         *v = v_p = mat->rowvalues;
1071         for (i=0; i<nzB; i++) {
1072           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1073           else break;
1074         }
1075         imark = i;
1076         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1077         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1078       }
1079       if (idx) {
1080         *idx = idx_p = mat->rowindices;
1081         if (imark > -1) {
1082           for (i=0; i<imark; i++) {
1083             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1084           }
1085         } else {
1086           for (i=0; i<nzB; i++) {
1087             if (cmap[cworkB[i]/bs] < cstart)
1088               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1089             else break;
1090           }
1091           imark = i;
1092         }
1093         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1094         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1095       }
1096     } else {
1097       if (idx) *idx = 0;
1098       if (v)   *v   = 0;
1099     }
1100   }
1101   *nz = nztot;
1102   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1103   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1104   PetscFunctionReturn(0);
1105 }
1106 
1107 #undef __FUNCT__
1108 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1109 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1110 {
1111   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1112 
1113   PetscFunctionBegin;
1114   if (!baij->getrowactive) {
1115     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1116   }
1117   baij->getrowactive = PETSC_FALSE;
1118   PetscFunctionReturn(0);
1119 }
1120 
1121 #undef __FUNCT__
1122 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1123 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1124 {
1125   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1126   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1127 
1128   PetscFunctionBegin;
1129   aA->getrow_utriangular = PETSC_TRUE;
1130   PetscFunctionReturn(0);
1131 }
1132 #undef __FUNCT__
1133 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1134 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1135 {
1136   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1137   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1138 
1139   PetscFunctionBegin;
1140   aA->getrow_utriangular = PETSC_FALSE;
1141   PetscFunctionReturn(0);
1142 }
1143 
1144 #undef __FUNCT__
1145 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1146 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1147 {
1148   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1149   PetscErrorCode ierr;
1150 
1151   PetscFunctionBegin;
1152   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1153   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1154   PetscFunctionReturn(0);
1155 }
1156 
1157 #undef __FUNCT__
1158 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1159 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1160 {
1161   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1162   PetscErrorCode ierr;
1163 
1164   PetscFunctionBegin;
1165   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1166   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1167   PetscFunctionReturn(0);
1168 }
1169 
1170 #undef __FUNCT__
1171 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1172 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1173 {
1174   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1175   PetscErrorCode ierr;
1176 
1177   PetscFunctionBegin;
1178   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1179   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1180   PetscFunctionReturn(0);
1181 }
1182 
1183 #undef __FUNCT__
1184 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1185 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1186 {
1187   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1188   Mat            A = a->A,B = a->B;
1189   PetscErrorCode ierr;
1190   PetscReal      isend[5],irecv[5];
1191 
1192   PetscFunctionBegin;
1193   info->block_size     = (PetscReal)matin->rmap.bs;
1194   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1195   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1196   isend[3] = info->memory;  isend[4] = info->mallocs;
1197   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1198   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1199   isend[3] += info->memory;  isend[4] += info->mallocs;
1200   if (flag == MAT_LOCAL) {
1201     info->nz_used      = isend[0];
1202     info->nz_allocated = isend[1];
1203     info->nz_unneeded  = isend[2];
1204     info->memory       = isend[3];
1205     info->mallocs      = isend[4];
1206   } else if (flag == MAT_GLOBAL_MAX) {
1207     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1208     info->nz_used      = irecv[0];
1209     info->nz_allocated = irecv[1];
1210     info->nz_unneeded  = irecv[2];
1211     info->memory       = irecv[3];
1212     info->mallocs      = irecv[4];
1213   } else if (flag == MAT_GLOBAL_SUM) {
1214     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1215     info->nz_used      = irecv[0];
1216     info->nz_allocated = irecv[1];
1217     info->nz_unneeded  = irecv[2];
1218     info->memory       = irecv[3];
1219     info->mallocs      = irecv[4];
1220   } else {
1221     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1222   }
1223   info->rows_global       = (PetscReal)A->rmap.N;
1224   info->columns_global    = (PetscReal)A->cmap.N;
1225   info->rows_local        = (PetscReal)A->rmap.N;
1226   info->columns_local     = (PetscReal)A->cmap.N;
1227   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1228   info->fill_ratio_needed = 0;
1229   info->factor_mallocs    = 0;
1230   PetscFunctionReturn(0);
1231 }
1232 
1233 #undef __FUNCT__
1234 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1235 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1236 {
1237   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1238   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1239   PetscErrorCode ierr;
1240 
1241   PetscFunctionBegin;
1242   switch (op) {
1243   case MAT_NO_NEW_NONZERO_LOCATIONS:
1244   case MAT_YES_NEW_NONZERO_LOCATIONS:
1245   case MAT_COLUMNS_UNSORTED:
1246   case MAT_COLUMNS_SORTED:
1247   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1248   case MAT_KEEP_ZEROED_ROWS:
1249   case MAT_NEW_NONZERO_LOCATION_ERR:
1250     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1251     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1252     break;
1253   case MAT_ROW_ORIENTED:
1254     a->roworiented = PETSC_TRUE;
1255     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1256     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1257     break;
1258   case MAT_ROWS_SORTED:
1259   case MAT_ROWS_UNSORTED:
1260   case MAT_YES_NEW_DIAGONALS:
1261     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
1262     break;
1263   case MAT_COLUMN_ORIENTED:
1264     a->roworiented = PETSC_FALSE;
1265     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1266     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1267     break;
1268   case MAT_IGNORE_OFF_PROC_ENTRIES:
1269     a->donotstash = PETSC_TRUE;
1270     break;
1271   case MAT_NO_NEW_DIAGONALS:
1272     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1273   case MAT_USE_HASH_TABLE:
1274     a->ht_flag = PETSC_TRUE;
1275     break;
1276   case MAT_NOT_SYMMETRIC:
1277   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1278   case MAT_HERMITIAN:
1279     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1280   case MAT_SYMMETRIC:
1281   case MAT_STRUCTURALLY_SYMMETRIC:
1282   case MAT_NOT_HERMITIAN:
1283   case MAT_SYMMETRY_ETERNAL:
1284   case MAT_NOT_SYMMETRY_ETERNAL:
1285     break;
1286   case MAT_IGNORE_LOWER_TRIANGULAR:
1287     aA->ignore_ltriangular = PETSC_TRUE;
1288     break;
1289   case MAT_ERROR_LOWER_TRIANGULAR:
1290     aA->ignore_ltriangular = PETSC_FALSE;
1291     break;
1292   case MAT_GETROW_UPPERTRIANGULAR:
1293     aA->getrow_utriangular = PETSC_TRUE;
1294     break;
1295   default:
1296     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1297   }
1298   PetscFunctionReturn(0);
1299 }
1300 
1301 #undef __FUNCT__
1302 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1303 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1304 {
1305   PetscErrorCode ierr;
1306   PetscFunctionBegin;
1307   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1308   PetscFunctionReturn(0);
1309 }
1310 
1311 #undef __FUNCT__
1312 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1313 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1314 {
1315   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1316   Mat            a=baij->A, b=baij->B;
1317   PetscErrorCode ierr;
1318   PetscInt       nv,m,n;
1319   PetscTruth     flg;
1320 
1321   PetscFunctionBegin;
1322   if (ll != rr){
1323     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1324     if (!flg)
1325       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1326   }
1327   if (!ll) PetscFunctionReturn(0);
1328 
1329   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1330   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1331 
1332   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1333   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1334 
1335   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1336 
1337   /* left diagonalscale the off-diagonal part */
1338   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1339 
1340   /* scale the diagonal part */
1341   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1342 
1343   /* right diagonalscale the off-diagonal part */
1344   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1345   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1346   PetscFunctionReturn(0);
1347 }
1348 
1349 #undef __FUNCT__
1350 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1351 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1352 {
1353   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1354   MPI_Comm          comm = A->comm;
1355   static PetscTruth called = PETSC_FALSE;
1356   PetscErrorCode    ierr;
1357 
1358   PetscFunctionBegin;
1359   if (!a->rank) {
1360     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1361   }
1362   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1363   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1364   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1365   PetscFunctionReturn(0);
1366 }
1367 
1368 #undef __FUNCT__
1369 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1370 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1371 {
1372   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1373   PetscErrorCode ierr;
1374 
1375   PetscFunctionBegin;
1376   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1377   PetscFunctionReturn(0);
1378 }
1379 
1380 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1381 
1382 #undef __FUNCT__
1383 #define __FUNCT__ "MatEqual_MPISBAIJ"
1384 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1385 {
1386   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1387   Mat            a,b,c,d;
1388   PetscTruth     flg;
1389   PetscErrorCode ierr;
1390 
1391   PetscFunctionBegin;
1392   a = matA->A; b = matA->B;
1393   c = matB->A; d = matB->B;
1394 
1395   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1396   if (flg) {
1397     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1398   }
1399   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1400   PetscFunctionReturn(0);
1401 }
1402 
1403 #undef __FUNCT__
1404 #define __FUNCT__ "MatCopy_MPISBAIJ"
1405 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1406 {
1407   PetscErrorCode ierr;
1408   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1409   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1410 
1411   PetscFunctionBegin;
1412   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1413   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1414     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1415     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1416     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1417   } else {
1418     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1419     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1420   }
1421   PetscFunctionReturn(0);
1422 }
1423 
1424 #undef __FUNCT__
1425 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1426 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1427 {
1428   PetscErrorCode ierr;
1429 
1430   PetscFunctionBegin;
1431   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1432   PetscFunctionReturn(0);
1433 }
1434 
1435 #include "petscblaslapack.h"
1436 #undef __FUNCT__
1437 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1438 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1439 {
1440   PetscErrorCode ierr;
1441   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1442   PetscBLASInt   bnz,one=1;
1443   Mat_SeqSBAIJ   *xa,*ya;
1444   Mat_SeqBAIJ    *xb,*yb;
1445 
1446   PetscFunctionBegin;
1447   if (str == SAME_NONZERO_PATTERN) {
1448     PetscScalar alpha = a;
1449     xa = (Mat_SeqSBAIJ *)xx->A->data;
1450     ya = (Mat_SeqSBAIJ *)yy->A->data;
1451     bnz = (PetscBLASInt)xa->nz;
1452     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1453     xb = (Mat_SeqBAIJ *)xx->B->data;
1454     yb = (Mat_SeqBAIJ *)yy->B->data;
1455     bnz = (PetscBLASInt)xb->nz;
1456     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1457   } else {
1458     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1459     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1460     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1461   }
1462   PetscFunctionReturn(0);
1463 }
1464 
1465 #undef __FUNCT__
1466 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1467 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1468 {
1469   PetscErrorCode ierr;
1470   PetscInt       i;
1471   PetscTruth     flg;
1472 
1473   PetscFunctionBegin;
1474   for (i=0; i<n; i++) {
1475     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1476     if (!flg) {
1477       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1478     }
1479   }
1480   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1481   PetscFunctionReturn(0);
1482 }
1483 
1484 
1485 /* -------------------------------------------------------------------*/
1486 static struct _MatOps MatOps_Values = {
1487        MatSetValues_MPISBAIJ,
1488        MatGetRow_MPISBAIJ,
1489        MatRestoreRow_MPISBAIJ,
1490        MatMult_MPISBAIJ,
1491 /* 4*/ MatMultAdd_MPISBAIJ,
1492        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1493        MatMultAdd_MPISBAIJ,
1494        0,
1495        0,
1496        0,
1497 /*10*/ 0,
1498        0,
1499        0,
1500        MatRelax_MPISBAIJ,
1501        MatTranspose_MPISBAIJ,
1502 /*15*/ MatGetInfo_MPISBAIJ,
1503        MatEqual_MPISBAIJ,
1504        MatGetDiagonal_MPISBAIJ,
1505        MatDiagonalScale_MPISBAIJ,
1506        MatNorm_MPISBAIJ,
1507 /*20*/ MatAssemblyBegin_MPISBAIJ,
1508        MatAssemblyEnd_MPISBAIJ,
1509        0,
1510        MatSetOption_MPISBAIJ,
1511        MatZeroEntries_MPISBAIJ,
1512 /*25*/ 0,
1513        0,
1514        0,
1515        0,
1516        0,
1517 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1518        0,
1519        0,
1520        0,
1521        0,
1522 /*35*/ MatDuplicate_MPISBAIJ,
1523        0,
1524        0,
1525        0,
1526        0,
1527 /*40*/ MatAXPY_MPISBAIJ,
1528        MatGetSubMatrices_MPISBAIJ,
1529        MatIncreaseOverlap_MPISBAIJ,
1530        MatGetValues_MPISBAIJ,
1531        MatCopy_MPISBAIJ,
1532 /*45*/ MatPrintHelp_MPISBAIJ,
1533        MatScale_MPISBAIJ,
1534        0,
1535        0,
1536        0,
1537 /*50*/ 0,
1538        0,
1539        0,
1540        0,
1541        0,
1542 /*55*/ 0,
1543        0,
1544        MatSetUnfactored_MPISBAIJ,
1545        0,
1546        MatSetValuesBlocked_MPISBAIJ,
1547 /*60*/ 0,
1548        0,
1549        0,
1550        0,
1551        0,
1552 /*65*/ 0,
1553        0,
1554        0,
1555        0,
1556        0,
1557 /*70*/ MatGetRowMax_MPISBAIJ,
1558        0,
1559        0,
1560        0,
1561        0,
1562 /*75*/ 0,
1563        0,
1564        0,
1565        0,
1566        0,
1567 /*80*/ 0,
1568        0,
1569        0,
1570        0,
1571        MatLoad_MPISBAIJ,
1572 /*85*/ 0,
1573        0,
1574        0,
1575        0,
1576        0,
1577 /*90*/ 0,
1578        0,
1579        0,
1580        0,
1581        0,
1582 /*95*/ 0,
1583        0,
1584        0,
1585        0,
1586        0,
1587 /*100*/0,
1588        0,
1589        0,
1590        0,
1591        0,
1592 /*105*/0,
1593        MatRealPart_MPISBAIJ,
1594        MatImaginaryPart_MPISBAIJ,
1595        MatGetRowUpperTriangular_MPISBAIJ,
1596        MatRestoreRowUpperTriangular_MPISBAIJ
1597 };
1598 
1599 
1600 EXTERN_C_BEGIN
1601 #undef __FUNCT__
1602 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1603 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1604 {
1605   PetscFunctionBegin;
1606   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1607   *iscopy = PETSC_FALSE;
1608   PetscFunctionReturn(0);
1609 }
1610 EXTERN_C_END
1611 
1612 EXTERN_C_BEGIN
1613 #undef __FUNCT__
1614 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1615 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1616 {
1617   Mat_MPISBAIJ   *b;
1618   PetscErrorCode ierr;
1619   PetscInt       i,mbs,Mbs;
1620 
1621   PetscFunctionBegin;
1622   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1623 
1624   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1625   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1626   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1627   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1628   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1629 
1630   B->rmap.bs = B->cmap.bs = bs;
1631   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1632   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1633 
1634   if (d_nnz) {
1635     for (i=0; i<B->rmap.n/bs; i++) {
1636       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1637     }
1638   }
1639   if (o_nnz) {
1640     for (i=0; i<B->rmap.n/bs; i++) {
1641       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1642     }
1643   }
1644   B->preallocated = PETSC_TRUE;
1645 
1646   b   = (Mat_MPISBAIJ*)B->data;
1647   mbs = B->rmap.n/bs;
1648   Mbs = B->rmap.N/bs;
1649   if (mbs*bs != B->rmap.n) {
1650     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1651   }
1652 
1653   B->rmap.bs  = bs;
1654   b->bs2 = bs*bs;
1655   b->mbs = mbs;
1656   b->nbs = mbs;
1657   b->Mbs = Mbs;
1658   b->Nbs = Mbs;
1659 
1660   for (i=0; i<=b->size; i++) {
1661     b->rangebs[i] = B->rmap.range[i]/bs;
1662   }
1663   b->rstartbs = B->rmap.rstart/bs;
1664   b->rendbs   = B->rmap.rend/bs;
1665 
1666   b->cstartbs = B->cmap.rstart/bs;
1667   b->cendbs   = B->cmap.rend/bs;
1668 
1669   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1670   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1671   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1672   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1673   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1674 
1675   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1676   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1677   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1678   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1679   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1680 
1681   /* build cache for off array entries formed */
1682   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1683 
1684   PetscFunctionReturn(0);
1685 }
1686 EXTERN_C_END
1687 
1688 /*MC
1689    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1690    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1691 
1692    Options Database Keys:
1693 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1694 
1695   Level: beginner
1696 
1697 .seealso: MatCreateMPISBAIJ
1698 M*/
1699 
1700 EXTERN_C_BEGIN
1701 #undef __FUNCT__
1702 #define __FUNCT__ "MatCreate_MPISBAIJ"
1703 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1704 {
1705   Mat_MPISBAIJ   *b;
1706   PetscErrorCode ierr;
1707   PetscTruth     flg;
1708 
1709   PetscFunctionBegin;
1710 
1711   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1712   B->data = (void*)b;
1713   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1714 
1715   B->ops->destroy    = MatDestroy_MPISBAIJ;
1716   B->ops->view       = MatView_MPISBAIJ;
1717   B->mapping    = 0;
1718   B->factor     = 0;
1719   B->assembled  = PETSC_FALSE;
1720 
1721   B->insertmode = NOT_SET_VALUES;
1722   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1723   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1724 
1725   /* build local table of row and column ownerships */
1726   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1727 
1728   /* build cache for off array entries formed */
1729   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1730   b->donotstash  = PETSC_FALSE;
1731   b->colmap      = PETSC_NULL;
1732   b->garray      = PETSC_NULL;
1733   b->roworiented = PETSC_TRUE;
1734 
1735 #if defined(PETSC_USE_MAT_SINGLE)
1736   /* stuff for MatSetValues_XXX in single precision */
1737   b->setvalueslen     = 0;
1738   b->setvaluescopy    = PETSC_NULL;
1739 #endif
1740 
1741   /* stuff used in block assembly */
1742   b->barray       = 0;
1743 
1744   /* stuff used for matrix vector multiply */
1745   b->lvec         = 0;
1746   b->Mvctx        = 0;
1747   b->slvec0       = 0;
1748   b->slvec0b      = 0;
1749   b->slvec1       = 0;
1750   b->slvec1a      = 0;
1751   b->slvec1b      = 0;
1752   b->sMvctx       = 0;
1753 
1754   /* stuff for MatGetRow() */
1755   b->rowindices   = 0;
1756   b->rowvalues    = 0;
1757   b->getrowactive = PETSC_FALSE;
1758 
1759   /* hash table stuff */
1760   b->ht           = 0;
1761   b->hd           = 0;
1762   b->ht_size      = 0;
1763   b->ht_flag      = PETSC_FALSE;
1764   b->ht_fact      = 0;
1765   b->ht_total_ct  = 0;
1766   b->ht_insert_ct = 0;
1767 
1768   b->in_loc       = 0;
1769   b->v_loc        = 0;
1770   b->n_loc        = 0;
1771   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1772   if (flg) {
1773     PetscReal fact = 1.39;
1774     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1775     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1776     if (fact <= 1.0) fact = 1.39;
1777     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1778     ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1779   }
1780   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1781                                      "MatStoreValues_MPISBAIJ",
1782                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1783   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1784                                      "MatRetrieveValues_MPISBAIJ",
1785                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1786   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1787                                      "MatGetDiagonalBlock_MPISBAIJ",
1788                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1789   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1790                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1791                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1792   B->symmetric                  = PETSC_TRUE;
1793   B->structurally_symmetric     = PETSC_TRUE;
1794   B->symmetric_set              = PETSC_TRUE;
1795   B->structurally_symmetric_set = PETSC_TRUE;
1796   PetscFunctionReturn(0);
1797 }
1798 EXTERN_C_END
1799 
1800 /*MC
1801    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1802 
1803    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1804    and MATMPISBAIJ otherwise.
1805 
1806    Options Database Keys:
1807 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1808 
1809   Level: beginner
1810 
1811 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1812 M*/
1813 
1814 EXTERN_C_BEGIN
1815 #undef __FUNCT__
1816 #define __FUNCT__ "MatCreate_SBAIJ"
1817 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1818 {
1819   PetscErrorCode ierr;
1820   PetscMPIInt    size;
1821 
1822   PetscFunctionBegin;
1823   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1824   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1825   if (size == 1) {
1826     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1827   } else {
1828     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1829   }
1830   PetscFunctionReturn(0);
1831 }
1832 EXTERN_C_END
1833 
1834 #undef __FUNCT__
1835 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1836 /*@C
1837    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1838    the user should preallocate the matrix storage by setting the parameters
1839    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1840    performance can be increased by more than a factor of 50.
1841 
1842    Collective on Mat
1843 
1844    Input Parameters:
1845 +  A - the matrix
1846 .  bs   - size of blockk
1847 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1848            submatrix  (same for all local rows)
1849 .  d_nnz - array containing the number of block nonzeros in the various block rows
1850            in the upper triangular and diagonal part of the in diagonal portion of the local
1851            (possibly different for each block row) or PETSC_NULL.  You must leave room
1852            for the diagonal entry even if it is zero.
1853 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1854            submatrix (same for all local rows).
1855 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1856            off-diagonal portion of the local submatrix (possibly different for
1857            each block row) or PETSC_NULL.
1858 
1859 
1860    Options Database Keys:
1861 .   -mat_no_unroll - uses code that does not unroll the loops in the
1862                      block calculations (much slower)
1863 .   -mat_block_size - size of the blocks to use
1864 
1865    Notes:
1866 
1867    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1868    than it must be used on all processors that share the object for that argument.
1869 
1870    If the *_nnz parameter is given then the *_nz parameter is ignored
1871 
1872    Storage Information:
1873    For a square global matrix we define each processor's diagonal portion
1874    to be its local rows and the corresponding columns (a square submatrix);
1875    each processor's off-diagonal portion encompasses the remainder of the
1876    local matrix (a rectangular submatrix).
1877 
1878    The user can specify preallocated storage for the diagonal part of
1879    the local submatrix with either d_nz or d_nnz (not both).  Set
1880    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1881    memory allocation.  Likewise, specify preallocated storage for the
1882    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1883 
1884    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1885    the figure below we depict these three local rows and all columns (0-11).
1886 
1887 .vb
1888            0 1 2 3 4 5 6 7 8 9 10 11
1889           -------------------
1890    row 3  |  o o o d d d o o o o o o
1891    row 4  |  o o o d d d o o o o o o
1892    row 5  |  o o o d d d o o o o o o
1893           -------------------
1894 .ve
1895 
1896    Thus, any entries in the d locations are stored in the d (diagonal)
1897    submatrix, and any entries in the o locations are stored in the
1898    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1899    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1900 
1901    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1902    plus the diagonal part of the d matrix,
1903    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1904    In general, for PDE problems in which most nonzeros are near the diagonal,
1905    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1906    or you will get TERRIBLE performance; see the users' manual chapter on
1907    matrices.
1908 
1909    Level: intermediate
1910 
1911 .keywords: matrix, block, aij, compressed row, sparse, parallel
1912 
1913 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1914 @*/
1915 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1916 {
1917   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1918 
1919   PetscFunctionBegin;
1920   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1921   if (f) {
1922     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1923   }
1924   PetscFunctionReturn(0);
1925 }
1926 
1927 #undef __FUNCT__
1928 #define __FUNCT__ "MatCreateMPISBAIJ"
1929 /*@C
1930    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1931    (block compressed row).  For good matrix assembly performance
1932    the user should preallocate the matrix storage by setting the parameters
1933    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1934    performance can be increased by more than a factor of 50.
1935 
1936    Collective on MPI_Comm
1937 
1938    Input Parameters:
1939 +  comm - MPI communicator
1940 .  bs   - size of blockk
1941 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1942            This value should be the same as the local size used in creating the
1943            y vector for the matrix-vector product y = Ax.
1944 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1945            This value should be the same as the local size used in creating the
1946            x vector for the matrix-vector product y = Ax.
1947 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1948 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1949 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1950            submatrix  (same for all local rows)
1951 .  d_nnz - array containing the number of block nonzeros in the various block rows
1952            in the upper triangular portion of the in diagonal portion of the local
1953            (possibly different for each block block row) or PETSC_NULL.
1954            You must leave room for the diagonal entry even if it is zero.
1955 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1956            submatrix (same for all local rows).
1957 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1958            off-diagonal portion of the local submatrix (possibly different for
1959            each block row) or PETSC_NULL.
1960 
1961    Output Parameter:
1962 .  A - the matrix
1963 
1964    Options Database Keys:
1965 .   -mat_no_unroll - uses code that does not unroll the loops in the
1966                      block calculations (much slower)
1967 .   -mat_block_size - size of the blocks to use
1968 .   -mat_mpi - use the parallel matrix data structures even on one processor
1969                (defaults to using SeqBAIJ format on one processor)
1970 
1971    Notes:
1972    The number of rows and columns must be divisible by blocksize.
1973 
1974    The user MUST specify either the local or global matrix dimensions
1975    (possibly both).
1976 
1977    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1978    than it must be used on all processors that share the object for that argument.
1979 
1980    If the *_nnz parameter is given then the *_nz parameter is ignored
1981 
1982    Storage Information:
1983    For a square global matrix we define each processor's diagonal portion
1984    to be its local rows and the corresponding columns (a square submatrix);
1985    each processor's off-diagonal portion encompasses the remainder of the
1986    local matrix (a rectangular submatrix).
1987 
1988    The user can specify preallocated storage for the diagonal part of
1989    the local submatrix with either d_nz or d_nnz (not both).  Set
1990    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1991    memory allocation.  Likewise, specify preallocated storage for the
1992    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1993 
1994    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1995    the figure below we depict these three local rows and all columns (0-11).
1996 
1997 .vb
1998            0 1 2 3 4 5 6 7 8 9 10 11
1999           -------------------
2000    row 3  |  o o o d d d o o o o o o
2001    row 4  |  o o o d d d o o o o o o
2002    row 5  |  o o o d d d o o o o o o
2003           -------------------
2004 .ve
2005 
2006    Thus, any entries in the d locations are stored in the d (diagonal)
2007    submatrix, and any entries in the o locations are stored in the
2008    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2009    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2010 
2011    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2012    plus the diagonal part of the d matrix,
2013    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2014    In general, for PDE problems in which most nonzeros are near the diagonal,
2015    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2016    or you will get TERRIBLE performance; see the users' manual chapter on
2017    matrices.
2018 
2019    Level: intermediate
2020 
2021 .keywords: matrix, block, aij, compressed row, sparse, parallel
2022 
2023 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2024 @*/
2025 
2026 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2027 {
2028   PetscErrorCode ierr;
2029   PetscMPIInt    size;
2030 
2031   PetscFunctionBegin;
2032   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2033   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2034   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2035   if (size > 1) {
2036     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2037     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2038   } else {
2039     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2040     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2041   }
2042   PetscFunctionReturn(0);
2043 }
2044 
2045 
2046 #undef __FUNCT__
2047 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2048 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2049 {
2050   Mat            mat;
2051   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2052   PetscErrorCode ierr;
2053   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2054   PetscScalar    *array;
2055 
2056   PetscFunctionBegin;
2057   *newmat       = 0;
2058   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2059   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2060   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2061   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2062   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2063   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2064 
2065   mat->factor       = matin->factor;
2066   mat->preallocated = PETSC_TRUE;
2067   mat->assembled    = PETSC_TRUE;
2068   mat->insertmode   = NOT_SET_VALUES;
2069 
2070   a = (Mat_MPISBAIJ*)mat->data;
2071   a->bs2   = oldmat->bs2;
2072   a->mbs   = oldmat->mbs;
2073   a->nbs   = oldmat->nbs;
2074   a->Mbs   = oldmat->Mbs;
2075   a->Nbs   = oldmat->Nbs;
2076 
2077 
2078   a->size         = oldmat->size;
2079   a->rank         = oldmat->rank;
2080   a->donotstash   = oldmat->donotstash;
2081   a->roworiented  = oldmat->roworiented;
2082   a->rowindices   = 0;
2083   a->rowvalues    = 0;
2084   a->getrowactive = PETSC_FALSE;
2085   a->barray       = 0;
2086   a->rstartbs    = oldmat->rstartbs;
2087   a->rendbs      = oldmat->rendbs;
2088   a->cstartbs    = oldmat->cstartbs;
2089   a->cendbs      = oldmat->cendbs;
2090 
2091   /* hash table stuff */
2092   a->ht           = 0;
2093   a->hd           = 0;
2094   a->ht_size      = 0;
2095   a->ht_flag      = oldmat->ht_flag;
2096   a->ht_fact      = oldmat->ht_fact;
2097   a->ht_total_ct  = 0;
2098   a->ht_insert_ct = 0;
2099 
2100   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2101   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2102   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2103   if (oldmat->colmap) {
2104 #if defined (PETSC_USE_CTABLE)
2105     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2106 #else
2107     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2108     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2109     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2110 #endif
2111   } else a->colmap = 0;
2112 
2113   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2114     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2115     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2116     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2117   } else a->garray = 0;
2118 
2119   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2120   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2121   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2122   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2123 
2124   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2125   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2126   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2127   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2128 
2129   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2130   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2131   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2132   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2133   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2134   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2135   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2136   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2137   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2140   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2141   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2142 
2143   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2144   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2145   a->sMvctx = oldmat->sMvctx;
2146   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2147 
2148   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2149   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2150   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2151   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2152   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2153   *newmat = mat;
2154   PetscFunctionReturn(0);
2155 }
2156 
2157 #include "petscsys.h"
2158 
2159 #undef __FUNCT__
2160 #define __FUNCT__ "MatLoad_MPISBAIJ"
2161 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2162 {
2163   Mat            A;
2164   PetscErrorCode ierr;
2165   PetscInt       i,nz,j,rstart,rend;
2166   PetscScalar    *vals,*buf;
2167   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2168   MPI_Status     status;
2169   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2170   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2171   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2172   PetscInt       bs=1,Mbs,mbs,extra_rows;
2173   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2174   PetscInt       dcount,kmax,k,nzcount,tmp;
2175   int            fd;
2176 
2177   PetscFunctionBegin;
2178   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2179 
2180   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2181   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2182   if (!rank) {
2183     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2184     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2185     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2186     if (header[3] < 0) {
2187       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2188     }
2189   }
2190 
2191   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2192   M = header[1]; N = header[2];
2193 
2194   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2195 
2196   /*
2197      This code adds extra rows to make sure the number of rows is
2198      divisible by the blocksize
2199   */
2200   Mbs        = M/bs;
2201   extra_rows = bs - M + bs*(Mbs);
2202   if (extra_rows == bs) extra_rows = 0;
2203   else                  Mbs++;
2204   if (extra_rows &&!rank) {
2205     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2206   }
2207 
2208   /* determine ownership of all rows */
2209   mbs        = Mbs/size + ((Mbs % size) > rank);
2210   m          = mbs*bs;
2211   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2212   browners   = rowners + size + 1;
2213   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2214   rowners[0] = 0;
2215   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2216   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2217   rstart = rowners[rank];
2218   rend   = rowners[rank+1];
2219 
2220   /* distribute row lengths to all processors */
2221   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2222   if (!rank) {
2223     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2224     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2225     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2226     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2227     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2228     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2229     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2230   } else {
2231     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2232   }
2233 
2234   if (!rank) {   /* procs[0] */
2235     /* calculate the number of nonzeros on each processor */
2236     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2237     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2238     for (i=0; i<size; i++) {
2239       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2240         procsnz[i] += rowlengths[j];
2241       }
2242     }
2243     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2244 
2245     /* determine max buffer needed and allocate it */
2246     maxnz = 0;
2247     for (i=0; i<size; i++) {
2248       maxnz = PetscMax(maxnz,procsnz[i]);
2249     }
2250     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2251 
2252     /* read in my part of the matrix column indices  */
2253     nz     = procsnz[0];
2254     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2255     mycols = ibuf;
2256     if (size == 1)  nz -= extra_rows;
2257     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2258     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2259 
2260     /* read in every ones (except the last) and ship off */
2261     for (i=1; i<size-1; i++) {
2262       nz   = procsnz[i];
2263       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2264       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2265     }
2266     /* read in the stuff for the last proc */
2267     if (size != 1) {
2268       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2269       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2270       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2271       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2272     }
2273     ierr = PetscFree(cols);CHKERRQ(ierr);
2274   } else {  /* procs[i], i>0 */
2275     /* determine buffer space needed for message */
2276     nz = 0;
2277     for (i=0; i<m; i++) {
2278       nz += locrowlens[i];
2279     }
2280     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2281     mycols = ibuf;
2282     /* receive message of column indices*/
2283     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2284     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2285     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2286   }
2287 
2288   /* loop over local rows, determining number of off diagonal entries */
2289   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2290   odlens   = dlens + (rend-rstart);
2291   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2292   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2293   masked1  = mask    + Mbs;
2294   masked2  = masked1 + Mbs;
2295   rowcount = 0; nzcount = 0;
2296   for (i=0; i<mbs; i++) {
2297     dcount  = 0;
2298     odcount = 0;
2299     for (j=0; j<bs; j++) {
2300       kmax = locrowlens[rowcount];
2301       for (k=0; k<kmax; k++) {
2302         tmp = mycols[nzcount++]/bs; /* block col. index */
2303         if (!mask[tmp]) {
2304           mask[tmp] = 1;
2305           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2306           else masked1[dcount++] = tmp; /* entry in diag portion */
2307         }
2308       }
2309       rowcount++;
2310     }
2311 
2312     dlens[i]  = dcount;  /* d_nzz[i] */
2313     odlens[i] = odcount; /* o_nzz[i] */
2314 
2315     /* zero out the mask elements we set */
2316     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2317     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2318   }
2319 
2320   /* create our matrix */
2321   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2322   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2323   ierr = MatSetType(A,type);CHKERRQ(ierr);
2324   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2325   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2326 
2327   if (!rank) {
2328     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2329     /* read in my part of the matrix numerical values  */
2330     nz = procsnz[0];
2331     vals = buf;
2332     mycols = ibuf;
2333     if (size == 1)  nz -= extra_rows;
2334     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2335     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2336 
2337     /* insert into matrix */
2338     jj      = rstart*bs;
2339     for (i=0; i<m; i++) {
2340       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2341       mycols += locrowlens[i];
2342       vals   += locrowlens[i];
2343       jj++;
2344     }
2345 
2346     /* read in other processors (except the last one) and ship out */
2347     for (i=1; i<size-1; i++) {
2348       nz   = procsnz[i];
2349       vals = buf;
2350       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2351       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2352     }
2353     /* the last proc */
2354     if (size != 1){
2355       nz   = procsnz[i] - extra_rows;
2356       vals = buf;
2357       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2358       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2359       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2360     }
2361     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2362 
2363   } else {
2364     /* receive numeric values */
2365     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2366 
2367     /* receive message of values*/
2368     vals   = buf;
2369     mycols = ibuf;
2370     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2371     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2372     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2373 
2374     /* insert into matrix */
2375     jj      = rstart*bs;
2376     for (i=0; i<m; i++) {
2377       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2378       mycols += locrowlens[i];
2379       vals   += locrowlens[i];
2380       jj++;
2381     }
2382   }
2383 
2384   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2385   ierr = PetscFree(buf);CHKERRQ(ierr);
2386   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2387   ierr = PetscFree(rowners);CHKERRQ(ierr);
2388   ierr = PetscFree(dlens);CHKERRQ(ierr);
2389   ierr = PetscFree(mask);CHKERRQ(ierr);
2390   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2391   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   *newmat = A;
2393   PetscFunctionReturn(0);
2394 }
2395 
2396 #undef __FUNCT__
2397 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2398 /*XXXXX@
2399    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2400 
2401    Input Parameters:
2402 .  mat  - the matrix
2403 .  fact - factor
2404 
2405    Collective on Mat
2406 
2407    Level: advanced
2408 
2409   Notes:
2410    This can also be set by the command line option: -mat_use_hash_table fact
2411 
2412 .keywords: matrix, hashtable, factor, HT
2413 
2414 .seealso: MatSetOption()
2415 @XXXXX*/
2416 
2417 
2418 #undef __FUNCT__
2419 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2420 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2421 {
2422   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2423   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2424   PetscReal      atmp;
2425   PetscReal      *work,*svalues,*rvalues;
2426   PetscErrorCode ierr;
2427   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2428   PetscMPIInt    rank,size;
2429   PetscInt       *rowners_bs,dest,count,source;
2430   PetscScalar    *va;
2431   MatScalar      *ba;
2432   MPI_Status     stat;
2433 
2434   PetscFunctionBegin;
2435   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2436   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2437 
2438   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2439   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2440 
2441   bs   = A->rmap.bs;
2442   mbs  = a->mbs;
2443   Mbs  = a->Mbs;
2444   ba   = b->a;
2445   bi   = b->i;
2446   bj   = b->j;
2447 
2448   /* find ownerships */
2449   rowners_bs = A->rmap.range;
2450 
2451   /* each proc creates an array to be distributed */
2452   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2453   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2454 
2455   /* row_max for B */
2456   if (rank != size-1){
2457     for (i=0; i<mbs; i++) {
2458       ncols = bi[1] - bi[0]; bi++;
2459       brow  = bs*i;
2460       for (j=0; j<ncols; j++){
2461         bcol = bs*(*bj);
2462         for (kcol=0; kcol<bs; kcol++){
2463           col = bcol + kcol;                 /* local col index */
2464           col += rowners_bs[rank+1];      /* global col index */
2465           for (krow=0; krow<bs; krow++){
2466             atmp = PetscAbsScalar(*ba); ba++;
2467             row = brow + krow;    /* local row index */
2468             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2469             if (work[col] < atmp) work[col] = atmp;
2470           }
2471         }
2472         bj++;
2473       }
2474     }
2475 
2476     /* send values to its owners */
2477     for (dest=rank+1; dest<size; dest++){
2478       svalues = work + rowners_bs[dest];
2479       count   = rowners_bs[dest+1]-rowners_bs[dest];
2480       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2481     }
2482   }
2483 
2484   /* receive values */
2485   if (rank){
2486     rvalues = work;
2487     count   = rowners_bs[rank+1]-rowners_bs[rank];
2488     for (source=0; source<rank; source++){
2489       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2490       /* process values */
2491       for (i=0; i<count; i++){
2492         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2493       }
2494     }
2495   }
2496 
2497   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2498   ierr = PetscFree(work);CHKERRQ(ierr);
2499   PetscFunctionReturn(0);
2500 }
2501 
2502 #undef __FUNCT__
2503 #define __FUNCT__ "MatRelax_MPISBAIJ"
2504 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2505 {
2506   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2507   PetscErrorCode ierr;
2508   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2509   PetscScalar    *x,*b,*ptr,zero=0.0;
2510   Vec            bb1;
2511 
2512   PetscFunctionBegin;
2513   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2514   if (bs > 1)
2515     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2516 
2517   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2518     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2519       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2520       its--;
2521     }
2522 
2523     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2524     while (its--){
2525 
2526       /* lower triangular part: slvec0b = - B^T*xx */
2527       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2528 
2529       /* copy xx into slvec0a */
2530       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2531       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2532       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2533       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2534 
2535       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2536 
2537       /* copy bb into slvec1a */
2538       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2539       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2540       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2541       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2542 
2543       /* set slvec1b = 0 */
2544       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2545 
2546       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2547       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2548       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2549       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2550 
2551       /* upper triangular part: bb1 = bb1 - B*x */
2552       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2553 
2554       /* local diagonal sweep */
2555       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2556     }
2557     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2558   } else {
2559     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2560   }
2561   PetscFunctionReturn(0);
2562 }
2563 
2564 #undef __FUNCT__
2565 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2566 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2567 {
2568   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2569   PetscErrorCode ierr;
2570   Vec            lvec1,bb1;
2571 
2572   PetscFunctionBegin;
2573   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2574   if (matin->rmap.bs > 1)
2575     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2576 
2577   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2578     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2579       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2580       its--;
2581     }
2582 
2583     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2584     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2585     while (its--){
2586       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2587 
2588       /* lower diagonal part: bb1 = bb - B^T*xx */
2589       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2590       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2591 
2592       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2593       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2594       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2595 
2596       /* upper diagonal part: bb1 = bb1 - B*x */
2597       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2598       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2599 
2600       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2601 
2602       /* diagonal sweep */
2603       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2604     }
2605     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2606     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2607   } else {
2608     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2609   }
2610   PetscFunctionReturn(0);
2611 }
2612 
2613