xref: /petsc/src/mat/impls/sbaij/mpi/mpisbaij.c (revision e9f7bc9ebdb6b7145bf88dbdd2c0c400f06b4c30)
1 #define PETSCMAT_DLL
2 
3 #include "src/mat/impls/baij/mpi/mpibaij.h"    /*I "petscmat.h" I*/
4 #include "mpisbaij.h"
5 #include "src/mat/impls/sbaij/seq/sbaij.h"
6 
7 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ(Mat);
8 EXTERN PetscErrorCode MatSetUpMultiply_MPISBAIJ_2comm(Mat);
9 EXTERN PetscErrorCode DisAssemble_MPISBAIJ(Mat);
10 EXTERN PetscErrorCode MatIncreaseOverlap_MPISBAIJ(Mat,PetscInt,IS[],PetscInt);
11 EXTERN PetscErrorCode MatGetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
12 EXTERN PetscErrorCode MatGetValues_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],PetscScalar []);
13 EXTERN PetscErrorCode MatSetValues_SeqSBAIJ(Mat,PetscInt,const PetscInt [],PetscInt,const PetscInt [],const PetscScalar [],InsertMode);
14 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
15 EXTERN PetscErrorCode MatSetValuesBlocked_SeqBAIJ(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const PetscScalar[],InsertMode);
16 EXTERN PetscErrorCode MatGetRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
17 EXTERN PetscErrorCode MatRestoreRow_SeqSBAIJ(Mat,PetscInt,PetscInt*,PetscInt**,PetscScalar**);
18 EXTERN PetscErrorCode MatPrintHelp_SeqSBAIJ(Mat);
19 EXTERN PetscErrorCode MatZeroRows_SeqSBAIJ(Mat,IS,PetscScalar*);
20 EXTERN PetscErrorCode MatZeroRows_SeqBAIJ(Mat,IS,PetscScalar *);
21 EXTERN PetscErrorCode MatGetRowMax_MPISBAIJ(Mat,Vec);
22 EXTERN PetscErrorCode MatRelax_MPISBAIJ(Mat,Vec,PetscReal,MatSORType,PetscReal,PetscInt,PetscInt,Vec);
23 
24 /*  UGLY, ugly, ugly
25    When MatScalar == PetscScalar the function MatSetValuesBlocked_MPIBAIJ_MatScalar() does
26    not exist. Otherwise ..._MatScalar() takes matrix elements in single precision and
27    inserts them into the single precision data structure. The function MatSetValuesBlocked_MPIBAIJ()
28    converts the entries into single precision and then calls ..._MatScalar() to put them
29    into the single precision data structures.
30 */
31 #if defined(PETSC_USE_MAT_SINGLE)
32 EXTERN PetscErrorCode MatSetValuesBlocked_SeqSBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
33 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
34 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
35 EXTERN PetscErrorCode MatSetValues_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
36 EXTERN PetscErrorCode MatSetValuesBlocked_MPISBAIJ_HT_MatScalar(Mat,PetscInt,const PetscInt[],PetscInt,const PetscInt[],const MatScalar[],InsertMode);
37 #else
38 #define MatSetValuesBlocked_SeqSBAIJ_MatScalar      MatSetValuesBlocked_SeqSBAIJ
39 #define MatSetValues_MPISBAIJ_MatScalar             MatSetValues_MPISBAIJ
40 #define MatSetValuesBlocked_MPISBAIJ_MatScalar      MatSetValuesBlocked_MPISBAIJ
41 #define MatSetValues_MPISBAIJ_HT_MatScalar          MatSetValues_MPISBAIJ_HT
42 #define MatSetValuesBlocked_MPISBAIJ_HT_MatScalar   MatSetValuesBlocked_MPISBAIJ_HT
43 #endif
44 
45 EXTERN_C_BEGIN
46 #undef __FUNCT__
47 #define __FUNCT__ "MatStoreValues_MPISBAIJ"
48 PetscErrorCode PETSCMAT_DLLEXPORT MatStoreValues_MPISBAIJ(Mat mat)
49 {
50   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
51   PetscErrorCode ierr;
52 
53   PetscFunctionBegin;
54   ierr = MatStoreValues(aij->A);CHKERRQ(ierr);
55   ierr = MatStoreValues(aij->B);CHKERRQ(ierr);
56   PetscFunctionReturn(0);
57 }
58 EXTERN_C_END
59 
60 EXTERN_C_BEGIN
61 #undef __FUNCT__
62 #define __FUNCT__ "MatRetrieveValues_MPISBAIJ"
63 PetscErrorCode PETSCMAT_DLLEXPORT MatRetrieveValues_MPISBAIJ(Mat mat)
64 {
65   Mat_MPISBAIJ   *aij = (Mat_MPISBAIJ *)mat->data;
66   PetscErrorCode ierr;
67 
68   PetscFunctionBegin;
69   ierr = MatRetrieveValues(aij->A);CHKERRQ(ierr);
70   ierr = MatRetrieveValues(aij->B);CHKERRQ(ierr);
71   PetscFunctionReturn(0);
72 }
73 EXTERN_C_END
74 
75 
76 #define CHUNKSIZE  10
77 
78 #define  MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv) \
79 { \
80  \
81     brow = row/bs;  \
82     rp   = aj + ai[brow]; ap = aa + bs2*ai[brow]; \
83     rmax = aimax[brow]; nrow = ailen[brow]; \
84       bcol = col/bs; \
85       ridx = row % bs; cidx = col % bs; \
86       low = 0; high = nrow; \
87       while (high-low > 3) { \
88         t = (low+high)/2; \
89         if (rp[t] > bcol) high = t; \
90         else              low  = t; \
91       } \
92       for (_i=low; _i<high; _i++) { \
93         if (rp[_i] > bcol) break; \
94         if (rp[_i] == bcol) { \
95           bap  = ap +  bs2*_i + bs*cidx + ridx; \
96           if (addv == ADD_VALUES) *bap += value;  \
97           else                    *bap  = value;  \
98           goto a_noinsert; \
99         } \
100       } \
101       if (a->nonew == 1) goto a_noinsert; \
102       if (a->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
103       MatSeqXAIJReallocateAIJ(A,a->mbs,bs2,nrow,brow,bcol,rmax,aa,ai,aj,rp,ap,aimax,a->nonew); \
104       N = nrow++ - 1;  \
105       /* shift up all the later entries in this row */ \
106       for (ii=N; ii>=_i; ii--) { \
107         rp[ii+1] = rp[ii]; \
108         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
109       } \
110       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr); }  \
111       rp[_i]                      = bcol;  \
112       ap[bs2*_i + bs*cidx + ridx] = value;  \
113       a_noinsert:; \
114     ailen[brow] = nrow; \
115 }
116 #ifndef MatSetValues_SeqBAIJ_B_Private
117 #define  MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv) \
118 { \
119     brow = row/bs;  \
120     rp   = bj + bi[brow]; ap = ba + bs2*bi[brow]; \
121     rmax = bimax[brow]; nrow = bilen[brow]; \
122       bcol = col/bs; \
123       ridx = row % bs; cidx = col % bs; \
124       low = 0; high = nrow; \
125       while (high-low > 3) { \
126         t = (low+high)/2; \
127         if (rp[t] > bcol) high = t; \
128         else              low  = t; \
129       } \
130       for (_i=low; _i<high; _i++) { \
131         if (rp[_i] > bcol) break; \
132         if (rp[_i] == bcol) { \
133           bap  = ap +  bs2*_i + bs*cidx + ridx; \
134           if (addv == ADD_VALUES) *bap += value;  \
135           else                    *bap  = value;  \
136           goto b_noinsert; \
137         } \
138       } \
139       if (b->nonew == 1) goto b_noinsert; \
140       if (b->nonew == -1) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Inserting a new nonzero (%D, %D) into matrix", row, col); \
141       MatSeqXAIJReallocateAIJ(B,b->mbs,bs2,nrow,brow,bcol,rmax,ba,bi,bj,rp,ap,bimax,b->nonew); \
142       N = nrow++ - 1;  \
143       /* shift up all the later entries in this row */ \
144       for (ii=N; ii>=_i; ii--) { \
145         rp[ii+1] = rp[ii]; \
146         ierr = PetscMemcpy(ap+bs2*(ii+1),ap+bs2*(ii),bs2*sizeof(MatScalar));CHKERRQ(ierr); \
147       } \
148       if (N>=_i) { ierr = PetscMemzero(ap+bs2*_i,bs2*sizeof(MatScalar));CHKERRQ(ierr);}  \
149       rp[_i]                      = bcol;  \
150       ap[bs2*_i + bs*cidx + ridx] = value;  \
151       b_noinsert:; \
152     bilen[brow] = nrow; \
153 }
154 #endif
155 
156 #if defined(PETSC_USE_MAT_SINGLE)
157 #undef __FUNCT__
158 #define __FUNCT__ "MatSetValues_MPISBAIJ"
159 PetscErrorCode MatSetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
160 {
161   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ*)mat->data;
162   PetscErrorCode ierr;
163   PetscInt       i,N = m*n;
164   MatScalar      *vsingle;
165 
166   PetscFunctionBegin;
167   if (N > b->setvalueslen) {
168     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
169     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
170     b->setvalueslen  = N;
171   }
172   vsingle = b->setvaluescopy;
173 
174   for (i=0; i<N; i++) {
175     vsingle[i] = v[i];
176   }
177   ierr = MatSetValues_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
178   PetscFunctionReturn(0);
179 }
180 
181 #undef __FUNCT__
182 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ"
183 PetscErrorCode MatSetValuesBlocked_MPISBAIJ(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const PetscScalar v[],InsertMode addv)
184 {
185   Mat_MPIBAIJ    *b = (Mat_MPIBAIJ*)mat->data;
186   PetscErrorCode ierr;
187   PetscInt       i,N = m*n*b->bs2;
188   MatScalar      *vsingle;
189 
190   PetscFunctionBegin;
191   if (N > b->setvalueslen) {
192     ierr = PetscFree(b->setvaluescopy);CHKERRQ(ierr);
193     ierr = PetscMalloc(N*sizeof(MatScalar),&b->setvaluescopy);CHKERRQ(ierr);
194     b->setvalueslen  = N;
195   }
196   vsingle = b->setvaluescopy;
197   for (i=0; i<N; i++) {
198     vsingle[i] = v[i];
199   }
200   ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,m,im,n,in,vsingle,addv);CHKERRQ(ierr);
201   PetscFunctionReturn(0);
202 }
203 #endif
204 
205 /* Only add/insert a(i,j) with i<=j (blocks).
206    Any a(i,j) with i>j input by user is ingored.
207 */
208 #undef __FUNCT__
209 #define __FUNCT__ "MatSetValues_MPISBAIJ_MatScalar"
210 PetscErrorCode MatSetValues_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
211 {
212   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
213   MatScalar      value;
214   PetscTruth     roworiented = baij->roworiented;
215   PetscErrorCode ierr;
216   PetscInt       i,j,row,col;
217   PetscInt       rstart_orig=mat->rmap.rstart;
218   PetscInt       rend_orig=mat->rmap.rend,cstart_orig=mat->cmap.rstart;
219   PetscInt       cend_orig=mat->cmap.rend,bs=mat->rmap.bs;
220 
221   /* Some Variables required in the macro */
222   Mat            A = baij->A;
223   Mat_SeqSBAIJ   *a = (Mat_SeqSBAIJ*)(A)->data;
224   PetscInt       *aimax=a->imax,*ai=a->i,*ailen=a->ilen,*aj=a->j;
225   MatScalar      *aa=a->a;
226 
227   Mat            B = baij->B;
228   Mat_SeqBAIJ   *b = (Mat_SeqBAIJ*)(B)->data;
229   PetscInt      *bimax=b->imax,*bi=b->i,*bilen=b->ilen,*bj=b->j;
230   MatScalar     *ba=b->a;
231 
232   PetscInt      *rp,ii,nrow,_i,rmax,N,brow,bcol;
233   PetscInt      low,high,t,ridx,cidx,bs2=a->bs2;
234   MatScalar     *ap,*bap;
235 
236   /* for stash */
237   PetscInt      n_loc, *in_loc = PETSC_NULL;
238   MatScalar     *v_loc = PETSC_NULL;
239 
240   PetscFunctionBegin;
241 
242   if (!baij->donotstash){
243     if (n > baij->n_loc) {
244       ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
245       ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
246       ierr = PetscMalloc(n*sizeof(PetscInt),&baij->in_loc);CHKERRQ(ierr);
247       ierr = PetscMalloc(n*sizeof(MatScalar),&baij->v_loc);CHKERRQ(ierr);
248       baij->n_loc = n;
249     }
250     in_loc = baij->in_loc;
251     v_loc  = baij->v_loc;
252   }
253 
254   for (i=0; i<m; i++) {
255     if (im[i] < 0) continue;
256 #if defined(PETSC_USE_DEBUG)
257     if (im[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",im[i],mat->rmap.N-1);
258 #endif
259     if (im[i] >= rstart_orig && im[i] < rend_orig) { /* this processor entry */
260       row = im[i] - rstart_orig;              /* local row index */
261       for (j=0; j<n; j++) {
262         if (im[i]/bs > in[j]/bs){
263           if (a->ignore_ltriangular){
264             continue;    /* ignore lower triangular blocks */
265           } else {
266             SETERRQ(PETSC_ERR_USER,"Lower triangular value cannot be set for sbaij format. Ignoring these values, run with -mat_ignore_lower_triangular or call MatSetOption(mat,MAT_IGNORE_LOWER_TRIANGULAR)");
267           }
268         }
269         if (in[j] >= cstart_orig && in[j] < cend_orig){  /* diag entry (A) */
270           col = in[j] - cstart_orig;          /* local col index */
271           brow = row/bs; bcol = col/bs;
272           if (brow > bcol) continue;  /* ignore lower triangular blocks of A */
273           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
274           MatSetValues_SeqSBAIJ_A_Private(row,col,value,addv);
275           /* ierr = MatSetValues_SeqBAIJ(baij->A,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
276         } else if (in[j] < 0) continue;
277 #if defined(PETSC_USE_DEBUG)
278         else if (in[j] >= mat->cmap.N) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",in[j],mat->cmap.N-1);}
279 #endif
280         else {  /* off-diag entry (B) */
281           if (mat->was_assembled) {
282             if (!baij->colmap) {
283               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
284             }
285 #if defined (PETSC_USE_CTABLE)
286             ierr = PetscTableFind(baij->colmap,in[j]/bs + 1,&col);CHKERRQ(ierr);
287             col  = col - 1;
288 #else
289             col = baij->colmap[in[j]/bs] - 1;
290 #endif
291             if (col < 0 && !((Mat_SeqSBAIJ*)(baij->A->data))->nonew) {
292               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
293               col =  in[j];
294               /* Reinitialize the variables required by MatSetValues_SeqBAIJ_B_Private() */
295               B = baij->B;
296               b = (Mat_SeqBAIJ*)(B)->data;
297               bimax=b->imax;bi=b->i;bilen=b->ilen;bj=b->j;
298               ba=b->a;
299             } else col += in[j]%bs;
300           } else col = in[j];
301           if (roworiented) value = v[i*n+j]; else value = v[i+j*m];
302           MatSetValues_SeqSBAIJ_B_Private(row,col,value,addv);
303           /* ierr = MatSetValues_SeqBAIJ(baij->B,1,&row,1,&col,&value,addv);CHKERRQ(ierr); */
304         }
305       }
306     } else {  /* off processor entry */
307       if (!baij->donotstash) {
308         n_loc = 0;
309         for (j=0; j<n; j++){
310           if (im[i]/bs > in[j]/bs) continue; /* ignore lower triangular blocks */
311           in_loc[n_loc] = in[j];
312           if (roworiented) {
313             v_loc[n_loc] = v[i*n+j];
314           } else {
315             v_loc[n_loc] = v[j*m+i];
316           }
317           n_loc++;
318         }
319         ierr = MatStashValuesRow_Private(&mat->stash,im[i],n_loc,in_loc,v_loc);CHKERRQ(ierr);
320       }
321     }
322   }
323   PetscFunctionReturn(0);
324 }
325 
326 #undef __FUNCT__
327 #define __FUNCT__ "MatSetValuesBlocked_MPISBAIJ_MatScalar"
328 PetscErrorCode MatSetValuesBlocked_MPISBAIJ_MatScalar(Mat mat,PetscInt m,const PetscInt im[],PetscInt n,const PetscInt in[],const MatScalar v[],InsertMode addv)
329 {
330   Mat_MPISBAIJ    *baij = (Mat_MPISBAIJ*)mat->data;
331   const MatScalar *value;
332   MatScalar       *barray=baij->barray;
333   PetscTruth      roworiented = baij->roworiented;
334   PetscErrorCode  ierr;
335   PetscInt        i,j,ii,jj,row,col,rstart=baij->rstartbs;
336   PetscInt        rend=baij->rendbs,cstart=baij->rstartbs,stepval;
337   PetscInt        cend=baij->rendbs,bs=mat->rmap.bs,bs2=baij->bs2;
338 
339   PetscFunctionBegin;
340   if(!barray) {
341     ierr         = PetscMalloc(bs2*sizeof(MatScalar),&barray);CHKERRQ(ierr);
342     baij->barray = barray;
343   }
344 
345   if (roworiented) {
346     stepval = (n-1)*bs;
347   } else {
348     stepval = (m-1)*bs;
349   }
350   for (i=0; i<m; i++) {
351     if (im[i] < 0) continue;
352 #if defined(PETSC_USE_DEBUG)
353     if (im[i] >= baij->Mbs) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large, row %D max %D",im[i],baij->Mbs-1);
354 #endif
355     if (im[i] >= rstart && im[i] < rend) {
356       row = im[i] - rstart;
357       for (j=0; j<n; j++) {
358         /* If NumCol = 1 then a copy is not required */
359         if ((roworiented) && (n == 1)) {
360           barray = (MatScalar*) v + i*bs2;
361         } else if((!roworiented) && (m == 1)) {
362           barray = (MatScalar*) v + j*bs2;
363         } else { /* Here a copy is required */
364           if (roworiented) {
365             value = v + i*(stepval+bs)*bs + j*bs;
366           } else {
367             value = v + j*(stepval+bs)*bs + i*bs;
368           }
369           for (ii=0; ii<bs; ii++,value+=stepval) {
370             for (jj=0; jj<bs; jj++) {
371               *barray++  = *value++;
372             }
373           }
374           barray -=bs2;
375         }
376 
377         if (in[j] >= cstart && in[j] < cend){
378           col  = in[j] - cstart;
379           ierr = MatSetValuesBlocked_SeqSBAIJ(baij->A,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
380         }
381         else if (in[j] < 0) continue;
382 #if defined(PETSC_USE_DEBUG)
383         else if (in[j] >= baij->Nbs) {SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large, col %D max %D",in[j],baij->Nbs-1);}
384 #endif
385         else {
386           if (mat->was_assembled) {
387             if (!baij->colmap) {
388               ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
389             }
390 
391 #if defined(PETSC_USE_DEBUG)
392 #if defined (PETSC_USE_CTABLE)
393             { PetscInt data;
394               ierr = PetscTableFind(baij->colmap,in[j]+1,&data);CHKERRQ(ierr);
395               if ((data - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
396             }
397 #else
398             if ((baij->colmap[in[j]] - 1) % bs) SETERRQ(PETSC_ERR_PLIB,"Incorrect colmap");
399 #endif
400 #endif
401 #if defined (PETSC_USE_CTABLE)
402 	    ierr = PetscTableFind(baij->colmap,in[j]+1,&col);CHKERRQ(ierr);
403             col  = (col - 1)/bs;
404 #else
405             col = (baij->colmap[in[j]] - 1)/bs;
406 #endif
407             if (col < 0 && !((Mat_SeqBAIJ*)(baij->A->data))->nonew) {
408               ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
409               col =  in[j];
410             }
411           }
412           else col = in[j];
413           ierr = MatSetValuesBlocked_SeqBAIJ(baij->B,1,&row,1,&col,barray,addv);CHKERRQ(ierr);
414         }
415       }
416     } else {
417       if (!baij->donotstash) {
418         if (roworiented) {
419           ierr = MatStashValuesRowBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
420         } else {
421           ierr = MatStashValuesColBlocked_Private(&mat->bstash,im[i],n,in,v,m,n,i);CHKERRQ(ierr);
422         }
423       }
424     }
425   }
426   PetscFunctionReturn(0);
427 }
428 
429 #undef __FUNCT__
430 #define __FUNCT__ "MatGetValues_MPISBAIJ"
431 PetscErrorCode MatGetValues_MPISBAIJ(Mat mat,PetscInt m,const PetscInt idxm[],PetscInt n,const PetscInt idxn[],PetscScalar v[])
432 {
433   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
434   PetscErrorCode ierr;
435   PetscInt       bs=mat->rmap.bs,i,j,bsrstart = mat->rmap.rstart,bsrend = mat->rmap.rend;
436   PetscInt       bscstart = mat->cmap.rstart,bscend = mat->cmap.rend,row,col,data;
437 
438   PetscFunctionBegin;
439   for (i=0; i<m; i++) {
440     if (idxm[i] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative row: %D",idxm[i]);
441     if (idxm[i] >= mat->rmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Row too large: row %D max %D",idxm[i],mat->rmap.N-1);
442     if (idxm[i] >= bsrstart && idxm[i] < bsrend) {
443       row = idxm[i] - bsrstart;
444       for (j=0; j<n; j++) {
445         if (idxn[j] < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"Negative column %D",idxn[j]);
446         if (idxn[j] >= mat->cmap.N) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"Column too large: col %D max %D",idxn[j],mat->cmap.N-1);
447         if (idxn[j] >= bscstart && idxn[j] < bscend){
448           col = idxn[j] - bscstart;
449           ierr = MatGetValues_SeqSBAIJ(baij->A,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
450         } else {
451           if (!baij->colmap) {
452             ierr = CreateColmap_MPIBAIJ_Private(mat);CHKERRQ(ierr);
453           }
454 #if defined (PETSC_USE_CTABLE)
455           ierr = PetscTableFind(baij->colmap,idxn[j]/bs+1,&data);CHKERRQ(ierr);
456           data --;
457 #else
458           data = baij->colmap[idxn[j]/bs]-1;
459 #endif
460           if((data < 0) || (baij->garray[data/bs] != idxn[j]/bs)) *(v+i*n+j) = 0.0;
461           else {
462             col  = data + idxn[j]%bs;
463             ierr = MatGetValues_SeqBAIJ(baij->B,1,&row,1,&col,v+i*n+j);CHKERRQ(ierr);
464           }
465         }
466       }
467     } else {
468       SETERRQ(PETSC_ERR_SUP,"Only local values currently supported");
469     }
470   }
471  PetscFunctionReturn(0);
472 }
473 
474 #undef __FUNCT__
475 #define __FUNCT__ "MatNorm_MPISBAIJ"
476 PetscErrorCode MatNorm_MPISBAIJ(Mat mat,NormType type,PetscReal *norm)
477 {
478   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
479   PetscErrorCode ierr;
480   PetscReal      sum[2],*lnorm2;
481 
482   PetscFunctionBegin;
483   if (baij->size == 1) {
484     ierr =  MatNorm(baij->A,type,norm);CHKERRQ(ierr);
485   } else {
486     if (type == NORM_FROBENIUS) {
487       ierr = PetscMalloc(2*sizeof(PetscReal),&lnorm2);CHKERRQ(ierr);
488       ierr =  MatNorm(baij->A,type,lnorm2);CHKERRQ(ierr);
489       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2++;            /* squar power of norm(A) */
490       ierr =  MatNorm(baij->B,type,lnorm2);CHKERRQ(ierr);
491       *lnorm2 = (*lnorm2)*(*lnorm2); lnorm2--;             /* squar power of norm(B) */
492       ierr = MPI_Allreduce(lnorm2,&sum,2,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
493       *norm = sqrt(sum[0] + 2*sum[1]);
494       ierr = PetscFree(lnorm2);CHKERRQ(ierr);
495     } else if (type == NORM_INFINITY || type == NORM_1) { /* max row/column sum */
496       Mat_SeqSBAIJ *amat=(Mat_SeqSBAIJ*)baij->A->data;
497       Mat_SeqBAIJ  *bmat=(Mat_SeqBAIJ*)baij->B->data;
498       PetscReal    *rsum,*rsum2,vabs;
499       PetscInt     *jj,*garray=baij->garray,rstart=baij->rstartbs,nz;
500       PetscInt     brow,bcol,col,bs=baij->A->rmap.bs,row,grow,gcol,mbs=amat->mbs;
501       MatScalar    *v;
502 
503       ierr  = PetscMalloc((2*mat->cmap.N+1)*sizeof(PetscReal),&rsum);CHKERRQ(ierr);
504       rsum2 = rsum + mat->cmap.N;
505       ierr  = PetscMemzero(rsum,mat->cmap.N*sizeof(PetscReal));CHKERRQ(ierr);
506       /* Amat */
507       v = amat->a; jj = amat->j;
508       for (brow=0; brow<mbs; brow++) {
509         grow = bs*(rstart + brow);
510         nz = amat->i[brow+1] - amat->i[brow];
511         for (bcol=0; bcol<nz; bcol++){
512           gcol = bs*(rstart + *jj); jj++;
513           for (col=0; col<bs; col++){
514             for (row=0; row<bs; row++){
515               vabs = PetscAbsScalar(*v); v++;
516               rsum[gcol+col] += vabs;
517               /* non-diagonal block */
518               if (bcol > 0 && vabs > 0.0) rsum[grow+row] += vabs;
519             }
520           }
521         }
522       }
523       /* Bmat */
524       v = bmat->a; jj = bmat->j;
525       for (brow=0; brow<mbs; brow++) {
526         grow = bs*(rstart + brow);
527         nz = bmat->i[brow+1] - bmat->i[brow];
528         for (bcol=0; bcol<nz; bcol++){
529           gcol = bs*garray[*jj]; jj++;
530           for (col=0; col<bs; col++){
531             for (row=0; row<bs; row++){
532               vabs = PetscAbsScalar(*v); v++;
533               rsum[gcol+col] += vabs;
534               rsum[grow+row] += vabs;
535             }
536           }
537         }
538       }
539       ierr = MPI_Allreduce(rsum,rsum2,mat->cmap.N,MPIU_REAL,MPI_SUM,mat->comm);CHKERRQ(ierr);
540       *norm = 0.0;
541       for (col=0; col<mat->cmap.N; col++) {
542         if (rsum2[col] > *norm) *norm = rsum2[col];
543       }
544       ierr = PetscFree(rsum);CHKERRQ(ierr);
545     } else {
546       SETERRQ(PETSC_ERR_SUP,"No support for this norm yet");
547     }
548   }
549   PetscFunctionReturn(0);
550 }
551 
552 #undef __FUNCT__
553 #define __FUNCT__ "MatAssemblyBegin_MPISBAIJ"
554 PetscErrorCode MatAssemblyBegin_MPISBAIJ(Mat mat,MatAssemblyType mode)
555 {
556   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
557   PetscErrorCode ierr;
558   PetscInt       nstash,reallocs;
559   InsertMode     addv;
560 
561   PetscFunctionBegin;
562   if (baij->donotstash) {
563     PetscFunctionReturn(0);
564   }
565 
566   /* make sure all processors are either in INSERTMODE or ADDMODE */
567   ierr = MPI_Allreduce(&mat->insertmode,&addv,1,MPI_INT,MPI_BOR,mat->comm);CHKERRQ(ierr);
568   if (addv == (ADD_VALUES|INSERT_VALUES)) {
569     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Some processors inserted others added");
570   }
571   mat->insertmode = addv; /* in case this processor had no cache */
572 
573   ierr = MatStashScatterBegin_Private(&mat->stash,mat->rmap.range);CHKERRQ(ierr);
574   ierr = MatStashScatterBegin_Private(&mat->bstash,baij->rangebs);CHKERRQ(ierr);
575   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
576   ierr = PetscInfo2(0,"Stash has %D entries,uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
577   ierr = MatStashGetInfo_Private(&mat->stash,&nstash,&reallocs);CHKERRQ(ierr);
578   ierr = PetscInfo2(0,"Block-Stash has %D entries, uses %D mallocs.\n",nstash,reallocs);CHKERRQ(ierr);
579   PetscFunctionReturn(0);
580 }
581 
582 #undef __FUNCT__
583 #define __FUNCT__ "MatAssemblyEnd_MPISBAIJ"
584 PetscErrorCode MatAssemblyEnd_MPISBAIJ(Mat mat,MatAssemblyType mode)
585 {
586   Mat_MPISBAIJ   *baij=(Mat_MPISBAIJ*)mat->data;
587   Mat_SeqSBAIJ   *a=(Mat_SeqSBAIJ*)baij->A->data;
588   PetscErrorCode ierr;
589   PetscInt       i,j,rstart,ncols,flg,bs2=baij->bs2;
590   PetscInt       *row,*col,other_disassembled;
591   PetscMPIInt    n;
592   PetscTruth     r1,r2,r3;
593   MatScalar      *val;
594   InsertMode     addv = mat->insertmode;
595 
596   /* do not use 'b=(Mat_SeqBAIJ*)baij->B->data' as B can be reset in disassembly */
597   PetscFunctionBegin;
598 
599   if (!baij->donotstash) {
600     while (1) {
601       ierr = MatStashScatterGetMesg_Private(&mat->stash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
602       if (!flg) break;
603 
604       for (i=0; i<n;) {
605         /* Now identify the consecutive vals belonging to the same row */
606         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
607         if (j < n) ncols = j-i;
608         else       ncols = n-i;
609         /* Now assemble all these values with a single function call */
610         ierr = MatSetValues_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i,addv);CHKERRQ(ierr);
611         i = j;
612       }
613     }
614     ierr = MatStashScatterEnd_Private(&mat->stash);CHKERRQ(ierr);
615     /* Now process the block-stash. Since the values are stashed column-oriented,
616        set the roworiented flag to column oriented, and after MatSetValues()
617        restore the original flags */
618     r1 = baij->roworiented;
619     r2 = a->roworiented;
620     r3 = ((Mat_SeqBAIJ*)baij->B->data)->roworiented;
621     baij->roworiented = PETSC_FALSE;
622     a->roworiented    = PETSC_FALSE;
623     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = PETSC_FALSE; /* b->roworinted */
624     while (1) {
625       ierr = MatStashScatterGetMesg_Private(&mat->bstash,&n,&row,&col,&val,&flg);CHKERRQ(ierr);
626       if (!flg) break;
627 
628       for (i=0; i<n;) {
629         /* Now identify the consecutive vals belonging to the same row */
630         for (j=i,rstart=row[j]; j<n; j++) { if (row[j] != rstart) break; }
631         if (j < n) ncols = j-i;
632         else       ncols = n-i;
633         ierr = MatSetValuesBlocked_MPISBAIJ_MatScalar(mat,1,row+i,ncols,col+i,val+i*bs2,addv);CHKERRQ(ierr);
634         i = j;
635       }
636     }
637     ierr = MatStashScatterEnd_Private(&mat->bstash);CHKERRQ(ierr);
638     baij->roworiented = r1;
639     a->roworiented    = r2;
640     ((Mat_SeqBAIJ*)baij->B->data)->roworiented    = r3; /* b->roworinted */
641   }
642 
643   ierr = MatAssemblyBegin(baij->A,mode);CHKERRQ(ierr);
644   ierr = MatAssemblyEnd(baij->A,mode);CHKERRQ(ierr);
645 
646   /* determine if any processor has disassembled, if so we must
647      also disassemble ourselfs, in order that we may reassemble. */
648   /*
649      if nonzero structure of submatrix B cannot change then we know that
650      no processor disassembled thus we can skip this stuff
651   */
652   if (!((Mat_SeqBAIJ*)baij->B->data)->nonew)  {
653     ierr = MPI_Allreduce(&mat->was_assembled,&other_disassembled,1,MPI_INT,MPI_PROD,mat->comm);CHKERRQ(ierr);
654     if (mat->was_assembled && !other_disassembled) {
655       ierr = DisAssemble_MPISBAIJ(mat);CHKERRQ(ierr);
656     }
657   }
658 
659   if (!mat->was_assembled && mode == MAT_FINAL_ASSEMBLY) {
660     ierr = MatSetUpMultiply_MPISBAIJ(mat);CHKERRQ(ierr); /* setup Mvctx and sMvctx */
661   }
662   ((Mat_SeqBAIJ*)baij->B->data)->compressedrow.use = PETSC_TRUE; /* b->compressedrow.use */
663   ierr = MatAssemblyBegin(baij->B,mode);CHKERRQ(ierr);
664   ierr = MatAssemblyEnd(baij->B,mode);CHKERRQ(ierr);
665 
666   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
667   baij->rowvalues = 0;
668 
669   PetscFunctionReturn(0);
670 }
671 
672 #undef __FUNCT__
673 #define __FUNCT__ "MatView_MPISBAIJ_ASCIIorDraworSocket"
674 static PetscErrorCode MatView_MPISBAIJ_ASCIIorDraworSocket(Mat mat,PetscViewer viewer)
675 {
676   Mat_MPISBAIJ      *baij = (Mat_MPISBAIJ*)mat->data;
677   PetscErrorCode    ierr;
678   PetscInt          bs = mat->rmap.bs;
679   PetscMPIInt       size = baij->size,rank = baij->rank;
680   PetscTruth        iascii,isdraw;
681   PetscViewer       sviewer;
682   PetscViewerFormat format;
683 
684   PetscFunctionBegin;
685   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
686   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
687   if (iascii) {
688     ierr = PetscViewerGetFormat(viewer,&format);CHKERRQ(ierr);
689     if (format == PETSC_VIEWER_ASCII_INFO_DETAIL) {
690       MatInfo info;
691       ierr = MPI_Comm_rank(mat->comm,&rank);CHKERRQ(ierr);
692       ierr = MatGetInfo(mat,MAT_LOCAL,&info);CHKERRQ(ierr);
693       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] Local rows %D nz %D nz alloced %D bs %D mem %D\n",
694               rank,mat->rmap.N,(PetscInt)info.nz_used*bs,(PetscInt)info.nz_allocated*bs,
695               mat->rmap.bs,(PetscInt)info.memory);CHKERRQ(ierr);
696       ierr = MatGetInfo(baij->A,MAT_LOCAL,&info);CHKERRQ(ierr);
697       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] on-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
698       ierr = MatGetInfo(baij->B,MAT_LOCAL,&info);CHKERRQ(ierr);
699       ierr = PetscViewerASCIISynchronizedPrintf(viewer,"[%d] off-diagonal part: nz %D \n",rank,(PetscInt)info.nz_used*bs);CHKERRQ(ierr);
700       ierr = PetscViewerFlush(viewer);CHKERRQ(ierr);
701       ierr = VecScatterView(baij->Mvctx,viewer);CHKERRQ(ierr);
702       PetscFunctionReturn(0);
703     } else if (format == PETSC_VIEWER_ASCII_INFO) {
704       ierr = PetscViewerASCIIPrintf(viewer,"  block size is %D\n",bs);CHKERRQ(ierr);
705       PetscFunctionReturn(0);
706     } else if (format == PETSC_VIEWER_ASCII_FACTOR_INFO) {
707       PetscFunctionReturn(0);
708     }
709   }
710 
711   if (isdraw) {
712     PetscDraw  draw;
713     PetscTruth isnull;
714     ierr = PetscViewerDrawGetDraw(viewer,0,&draw);CHKERRQ(ierr);
715     ierr = PetscDrawIsNull(draw,&isnull);CHKERRQ(ierr); if (isnull) PetscFunctionReturn(0);
716   }
717 
718   if (size == 1) {
719     ierr = PetscObjectSetName((PetscObject)baij->A,mat->name);CHKERRQ(ierr);
720     ierr = MatView(baij->A,viewer);CHKERRQ(ierr);
721   } else {
722     /* assemble the entire matrix onto first processor. */
723     Mat          A;
724     Mat_SeqSBAIJ *Aloc;
725     Mat_SeqBAIJ  *Bloc;
726     PetscInt     M = mat->rmap.N,N = mat->cmap.N,*ai,*aj,col,i,j,k,*rvals,mbs = baij->mbs;
727     MatScalar    *a;
728 
729     /* Should this be the same type as mat? */
730     ierr = MatCreate(mat->comm,&A);CHKERRQ(ierr);
731     if (!rank) {
732       ierr = MatSetSizes(A,M,N,M,N);CHKERRQ(ierr);
733     } else {
734       ierr = MatSetSizes(A,0,0,M,N);CHKERRQ(ierr);
735     }
736     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
737     ierr = MatMPISBAIJSetPreallocation(A,mat->rmap.bs,0,PETSC_NULL,0,PETSC_NULL);CHKERRQ(ierr);
738     ierr = PetscLogObjectParent(mat,A);CHKERRQ(ierr);
739 
740     /* copy over the A part */
741     Aloc  = (Mat_SeqSBAIJ*)baij->A->data;
742     ai    = Aloc->i; aj = Aloc->j; a = Aloc->a;
743     ierr  = PetscMalloc(bs*sizeof(PetscInt),&rvals);CHKERRQ(ierr);
744 
745     for (i=0; i<mbs; i++) {
746       rvals[0] = bs*(baij->rstartbs + i);
747       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
748       for (j=ai[i]; j<ai[i+1]; j++) {
749         col = (baij->cstartbs+aj[j])*bs;
750         for (k=0; k<bs; k++) {
751           ierr = MatSetValues_MPISBAIJ_MatScalar(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
752           col++; a += bs;
753         }
754       }
755     }
756     /* copy over the B part */
757     Bloc = (Mat_SeqBAIJ*)baij->B->data;
758     ai = Bloc->i; aj = Bloc->j; a = Bloc->a;
759     for (i=0; i<mbs; i++) {
760 
761       rvals[0] = bs*(baij->rstartbs + i);
762       for (j=1; j<bs; j++) { rvals[j] = rvals[j-1] + 1; }
763       for (j=ai[i]; j<ai[i+1]; j++) {
764         col = baij->garray[aj[j]]*bs;
765         for (k=0; k<bs; k++) {
766           ierr = MatSetValues(A,bs,rvals,1,&col,a,INSERT_VALUES);CHKERRQ(ierr);
767           col++; a += bs;
768         }
769       }
770     }
771     ierr = PetscFree(rvals);CHKERRQ(ierr);
772     ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
773     ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
774     /*
775        Everyone has to call to draw the matrix since the graphics waits are
776        synchronized across all processors that share the PetscDraw object
777     */
778     ierr = PetscViewerGetSingleton(viewer,&sviewer);CHKERRQ(ierr);
779     if (!rank) {
780       ierr = PetscObjectSetName((PetscObject)((Mat_MPISBAIJ*)(A->data))->A,mat->name);CHKERRQ(ierr);
781       ierr = MatView(((Mat_MPISBAIJ*)(A->data))->A,sviewer);CHKERRQ(ierr);
782     }
783     ierr = PetscViewerRestoreSingleton(viewer,&sviewer);CHKERRQ(ierr);
784     ierr = MatDestroy(A);CHKERRQ(ierr);
785   }
786   PetscFunctionReturn(0);
787 }
788 
789 #undef __FUNCT__
790 #define __FUNCT__ "MatView_MPISBAIJ"
791 PetscErrorCode MatView_MPISBAIJ(Mat mat,PetscViewer viewer)
792 {
793   PetscErrorCode ierr;
794   PetscTruth     iascii,isdraw,issocket,isbinary;
795 
796   PetscFunctionBegin;
797   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_ASCII,&iascii);CHKERRQ(ierr);
798   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_DRAW,&isdraw);CHKERRQ(ierr);
799   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_SOCKET,&issocket);CHKERRQ(ierr);
800   ierr = PetscTypeCompare((PetscObject)viewer,PETSC_VIEWER_BINARY,&isbinary);CHKERRQ(ierr);
801   if (iascii || isdraw || issocket || isbinary) {
802     ierr = MatView_MPISBAIJ_ASCIIorDraworSocket(mat,viewer);CHKERRQ(ierr);
803   } else {
804     SETERRQ1(PETSC_ERR_SUP,"Viewer type %s not supported by MPISBAIJ matrices",((PetscObject)viewer)->type_name);
805   }
806   PetscFunctionReturn(0);
807 }
808 
809 #undef __FUNCT__
810 #define __FUNCT__ "MatDestroy_MPISBAIJ"
811 PetscErrorCode MatDestroy_MPISBAIJ(Mat mat)
812 {
813   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
814   PetscErrorCode ierr;
815 
816   PetscFunctionBegin;
817 #if defined(PETSC_USE_LOG)
818   PetscLogObjectState((PetscObject)mat,"Rows=%D,Cols=%D",mat->rmap.N,mat->cmap.N);
819 #endif
820   ierr = MatStashDestroy_Private(&mat->stash);CHKERRQ(ierr);
821   ierr = MatStashDestroy_Private(&mat->bstash);CHKERRQ(ierr);
822   ierr = MatDestroy(baij->A);CHKERRQ(ierr);
823   ierr = MatDestroy(baij->B);CHKERRQ(ierr);
824 #if defined (PETSC_USE_CTABLE)
825   if (baij->colmap) {ierr = PetscTableDelete(baij->colmap);CHKERRQ(ierr);}
826 #else
827   ierr = PetscFree(baij->colmap);CHKERRQ(ierr);
828 #endif
829   ierr = PetscFree(baij->garray);CHKERRQ(ierr);
830   if (baij->lvec)   {ierr = VecDestroy(baij->lvec);CHKERRQ(ierr);}
831   if (baij->Mvctx)  {ierr = VecScatterDestroy(baij->Mvctx);CHKERRQ(ierr);}
832   if (baij->slvec0) {
833     ierr = VecDestroy(baij->slvec0);CHKERRQ(ierr);
834     ierr = VecDestroy(baij->slvec0b);CHKERRQ(ierr);
835   }
836   if (baij->slvec1) {
837     ierr = VecDestroy(baij->slvec1);CHKERRQ(ierr);
838     ierr = VecDestroy(baij->slvec1a);CHKERRQ(ierr);
839     ierr = VecDestroy(baij->slvec1b);CHKERRQ(ierr);
840   }
841   if (baij->sMvctx)  {ierr = VecScatterDestroy(baij->sMvctx);CHKERRQ(ierr);}
842   ierr = PetscFree(baij->rowvalues);CHKERRQ(ierr);
843   ierr = PetscFree(baij->barray);CHKERRQ(ierr);
844   ierr = PetscFree(baij->hd);CHKERRQ(ierr);
845 #if defined(PETSC_USE_MAT_SINGLE)
846   ierr = PetscFree(baij->setvaluescopy);CHKERRQ(ierr);
847 #endif
848   ierr = PetscFree(baij->in_loc);CHKERRQ(ierr);
849   ierr = PetscFree(baij->v_loc);CHKERRQ(ierr);
850   ierr = PetscFree(baij->rangebs);CHKERRQ(ierr);
851   ierr = PetscFree(baij);CHKERRQ(ierr);
852 
853   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatStoreValues_C","",PETSC_NULL);CHKERRQ(ierr);
854   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatRetrieveValues_C","",PETSC_NULL);CHKERRQ(ierr);
855   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatGetDiagonalBlock_C","",PETSC_NULL);CHKERRQ(ierr);
856   ierr = PetscObjectComposeFunction((PetscObject)mat,"MatMPISBAIJSetPreallocation_C","",PETSC_NULL);CHKERRQ(ierr);
857   PetscFunctionReturn(0);
858 }
859 
860 #undef __FUNCT__
861 #define __FUNCT__ "MatMult_MPISBAIJ"
862 PetscErrorCode MatMult_MPISBAIJ(Mat A,Vec xx,Vec yy)
863 {
864   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
865   PetscErrorCode ierr;
866   PetscInt       nt,mbs=a->mbs,bs=A->rmap.bs;
867   PetscScalar    *x,*from,zero=0.0;
868 
869   PetscFunctionBegin;
870   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
871   if (nt != A->cmap.n) {
872     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
873   }
874 
875   /* diagonal part */
876   ierr = (*a->A->ops->mult)(a->A,xx,a->slvec1a);CHKERRQ(ierr);
877   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
878 
879   /* subdiagonal part */
880   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
881   CHKMEMQ;
882   /* copy x into the vec slvec0 */
883   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
884   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
885   CHKMEMQ;
886   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
887   CHKMEMQ;
888   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
889 
890   CHKMEMQ;
891   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
892   CHKMEMQ;
893   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
894   CHKMEMQ;
895   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
896     CHKMEMQ;
897   /* supperdiagonal part */
898   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,yy);CHKERRQ(ierr);
899     CHKMEMQ;
900   PetscFunctionReturn(0);
901 }
902 
903 #undef __FUNCT__
904 #define __FUNCT__ "MatMult_MPISBAIJ_2comm"
905 PetscErrorCode MatMult_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy)
906 {
907   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
908   PetscErrorCode ierr;
909   PetscInt       nt;
910 
911   PetscFunctionBegin;
912   ierr = VecGetLocalSize(xx,&nt);CHKERRQ(ierr);
913   if (nt != A->cmap.n) {
914     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible partition of A and xx");
915   }
916   ierr = VecGetLocalSize(yy,&nt);CHKERRQ(ierr);
917   if (nt != A->rmap.N) {
918     SETERRQ(PETSC_ERR_ARG_SIZ,"Incompatible parition of A and yy");
919   }
920 
921   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
922   /* do diagonal part */
923   ierr = (*a->A->ops->mult)(a->A,xx,yy);CHKERRQ(ierr);
924   /* do supperdiagonal part */
925   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
926   ierr = (*a->B->ops->multadd)(a->B,a->lvec,yy,yy);CHKERRQ(ierr);
927   /* do subdiagonal part */
928   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
929   ierr = VecScatterBegin(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
930   ierr = VecScatterEnd(a->lvec,yy,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
931 
932   PetscFunctionReturn(0);
933 }
934 
935 #undef __FUNCT__
936 #define __FUNCT__ "MatMultAdd_MPISBAIJ"
937 PetscErrorCode MatMultAdd_MPISBAIJ(Mat A,Vec xx,Vec yy,Vec zz)
938 {
939   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
940   PetscErrorCode ierr;
941   PetscInt       mbs=a->mbs,bs=A->rmap.bs;
942   PetscScalar    *x,*from,zero=0.0;
943 
944   PetscFunctionBegin;
945   /*
946   PetscSynchronizedPrintf(A->comm," MatMultAdd is called ...\n");
947   PetscSynchronizedFlush(A->comm);
948   */
949   /* diagonal part */
950   ierr = (*a->A->ops->multadd)(a->A,xx,yy,a->slvec1a);CHKERRQ(ierr);
951   ierr = VecSet(a->slvec1b,zero);CHKERRQ(ierr);
952 
953   /* subdiagonal part */
954   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->slvec0b);CHKERRQ(ierr);
955 
956   /* copy x into the vec slvec0 */
957   ierr = VecGetArray(a->slvec0,&from);CHKERRQ(ierr);
958   ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
959   ierr = PetscMemcpy(from,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
960   ierr = VecRestoreArray(a->slvec0,&from);CHKERRQ(ierr);
961 
962   ierr = VecScatterBegin(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
963   ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
964   ierr = VecScatterEnd(a->slvec0,a->slvec1,ADD_VALUES,SCATTER_FORWARD,a->sMvctx);CHKERRQ(ierr);
965 
966   /* supperdiagonal part */
967   ierr = (*a->B->ops->multadd)(a->B,a->slvec1b,a->slvec1a,zz);CHKERRQ(ierr);
968 
969   PetscFunctionReturn(0);
970 }
971 
972 #undef __FUNCT__
973 #define __FUNCT__ "MatMultAdd_MPISBAIJ_2comm"
974 PetscErrorCode MatMultAdd_MPISBAIJ_2comm(Mat A,Vec xx,Vec yy,Vec zz)
975 {
976   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
977   PetscErrorCode ierr;
978 
979   PetscFunctionBegin;
980   ierr = VecScatterBegin(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
981   /* do diagonal part */
982   ierr = (*a->A->ops->multadd)(a->A,xx,yy,zz);CHKERRQ(ierr);
983   /* do supperdiagonal part */
984   ierr = VecScatterEnd(xx,a->lvec,INSERT_VALUES,SCATTER_FORWARD,a->Mvctx);CHKERRQ(ierr);
985   ierr = (*a->B->ops->multadd)(a->B,a->lvec,zz,zz);CHKERRQ(ierr);
986 
987   /* do subdiagonal part */
988   ierr = (*a->B->ops->multtranspose)(a->B,xx,a->lvec);CHKERRQ(ierr);
989   ierr = VecScatterBegin(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
990   ierr = VecScatterEnd(a->lvec,zz,ADD_VALUES,SCATTER_REVERSE,a->Mvctx);CHKERRQ(ierr);
991 
992   PetscFunctionReturn(0);
993 }
994 
995 /*
996   This only works correctly for square matrices where the subblock A->A is the
997    diagonal block
998 */
999 #undef __FUNCT__
1000 #define __FUNCT__ "MatGetDiagonal_MPISBAIJ"
1001 PetscErrorCode MatGetDiagonal_MPISBAIJ(Mat A,Vec v)
1002 {
1003   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1004   PetscErrorCode ierr;
1005 
1006   PetscFunctionBegin;
1007   /* if (a->rmap.N != a->cmap.N) SETERRQ(PETSC_ERR_SUP,"Supports only square matrix where A->A is diag block"); */
1008   ierr = MatGetDiagonal(a->A,v);CHKERRQ(ierr);
1009   PetscFunctionReturn(0);
1010 }
1011 
1012 #undef __FUNCT__
1013 #define __FUNCT__ "MatScale_MPISBAIJ"
1014 PetscErrorCode MatScale_MPISBAIJ(Mat A,PetscScalar aa)
1015 {
1016   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1017   PetscErrorCode ierr;
1018 
1019   PetscFunctionBegin;
1020   ierr = MatScale(a->A,aa);CHKERRQ(ierr);
1021   ierr = MatScale(a->B,aa);CHKERRQ(ierr);
1022   PetscFunctionReturn(0);
1023 }
1024 
1025 #undef __FUNCT__
1026 #define __FUNCT__ "MatGetRow_MPISBAIJ"
1027 PetscErrorCode MatGetRow_MPISBAIJ(Mat matin,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1028 {
1029   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
1030   PetscScalar    *vworkA,*vworkB,**pvA,**pvB,*v_p;
1031   PetscErrorCode ierr;
1032   PetscInt       bs = matin->rmap.bs,bs2 = mat->bs2,i,*cworkA,*cworkB,**pcA,**pcB;
1033   PetscInt       nztot,nzA,nzB,lrow,brstart = matin->rmap.rstart,brend = matin->rmap.rend;
1034   PetscInt       *cmap,*idx_p,cstart = mat->rstartbs;
1035 
1036   PetscFunctionBegin;
1037   if (mat->getrowactive == PETSC_TRUE) SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"Already active");
1038   mat->getrowactive = PETSC_TRUE;
1039 
1040   if (!mat->rowvalues && (idx || v)) {
1041     /*
1042         allocate enough space to hold information from the longest row.
1043     */
1044     Mat_SeqSBAIJ *Aa = (Mat_SeqSBAIJ*)mat->A->data;
1045     Mat_SeqBAIJ  *Ba = (Mat_SeqBAIJ*)mat->B->data;
1046     PetscInt     max = 1,mbs = mat->mbs,tmp;
1047     for (i=0; i<mbs; i++) {
1048       tmp = Aa->i[i+1] - Aa->i[i] + Ba->i[i+1] - Ba->i[i]; /* row length */
1049       if (max < tmp) { max = tmp; }
1050     }
1051     ierr = PetscMalloc(max*bs2*(sizeof(PetscInt)+sizeof(PetscScalar)),&mat->rowvalues);CHKERRQ(ierr);
1052     mat->rowindices = (PetscInt*)(mat->rowvalues + max*bs2);
1053   }
1054 
1055   if (row < brstart || row >= brend) SETERRQ(PETSC_ERR_SUP,"Only local rows")
1056   lrow = row - brstart;  /* local row index */
1057 
1058   pvA = &vworkA; pcA = &cworkA; pvB = &vworkB; pcB = &cworkB;
1059   if (!v)   {pvA = 0; pvB = 0;}
1060   if (!idx) {pcA = 0; if (!v) pcB = 0;}
1061   ierr = (*mat->A->ops->getrow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1062   ierr = (*mat->B->ops->getrow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1063   nztot = nzA + nzB;
1064 
1065   cmap  = mat->garray;
1066   if (v  || idx) {
1067     if (nztot) {
1068       /* Sort by increasing column numbers, assuming A and B already sorted */
1069       PetscInt imark = -1;
1070       if (v) {
1071         *v = v_p = mat->rowvalues;
1072         for (i=0; i<nzB; i++) {
1073           if (cmap[cworkB[i]/bs] < cstart)   v_p[i] = vworkB[i];
1074           else break;
1075         }
1076         imark = i;
1077         for (i=0; i<nzA; i++)     v_p[imark+i] = vworkA[i];
1078         for (i=imark; i<nzB; i++) v_p[nzA+i]   = vworkB[i];
1079       }
1080       if (idx) {
1081         *idx = idx_p = mat->rowindices;
1082         if (imark > -1) {
1083           for (i=0; i<imark; i++) {
1084             idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs;
1085           }
1086         } else {
1087           for (i=0; i<nzB; i++) {
1088             if (cmap[cworkB[i]/bs] < cstart)
1089               idx_p[i] = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1090             else break;
1091           }
1092           imark = i;
1093         }
1094         for (i=0; i<nzA; i++)     idx_p[imark+i] = cstart*bs + cworkA[i];
1095         for (i=imark; i<nzB; i++) idx_p[nzA+i]   = cmap[cworkB[i]/bs]*bs + cworkB[i]%bs ;
1096       }
1097     } else {
1098       if (idx) *idx = 0;
1099       if (v)   *v   = 0;
1100     }
1101   }
1102   *nz = nztot;
1103   ierr = (*mat->A->ops->restorerow)(mat->A,lrow,&nzA,pcA,pvA);CHKERRQ(ierr);
1104   ierr = (*mat->B->ops->restorerow)(mat->B,lrow,&nzB,pcB,pvB);CHKERRQ(ierr);
1105   PetscFunctionReturn(0);
1106 }
1107 
1108 #undef __FUNCT__
1109 #define __FUNCT__ "MatRestoreRow_MPISBAIJ"
1110 PetscErrorCode MatRestoreRow_MPISBAIJ(Mat mat,PetscInt row,PetscInt *nz,PetscInt **idx,PetscScalar **v)
1111 {
1112   Mat_MPISBAIJ *baij = (Mat_MPISBAIJ*)mat->data;
1113 
1114   PetscFunctionBegin;
1115   if (!baij->getrowactive) {
1116     SETERRQ(PETSC_ERR_ARG_WRONGSTATE,"MatGetRow() must be called first");
1117   }
1118   baij->getrowactive = PETSC_FALSE;
1119   PetscFunctionReturn(0);
1120 }
1121 
1122 #undef __FUNCT__
1123 #define __FUNCT__ "MatGetRowUpperTriangular_MPISBAIJ"
1124 PetscErrorCode MatGetRowUpperTriangular_MPISBAIJ(Mat A)
1125 {
1126   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1127   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1128 
1129   PetscFunctionBegin;
1130   aA->getrow_utriangular = PETSC_TRUE;
1131   PetscFunctionReturn(0);
1132 }
1133 #undef __FUNCT__
1134 #define __FUNCT__ "MatRestoreRowUpperTriangular_MPISBAIJ"
1135 PetscErrorCode MatRestoreRowUpperTriangular_MPISBAIJ(Mat A)
1136 {
1137   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1138   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1139 
1140   PetscFunctionBegin;
1141   aA->getrow_utriangular = PETSC_FALSE;
1142   PetscFunctionReturn(0);
1143 }
1144 
1145 #undef __FUNCT__
1146 #define __FUNCT__ "MatRealPart_MPISBAIJ"
1147 PetscErrorCode MatRealPart_MPISBAIJ(Mat A)
1148 {
1149   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1150   PetscErrorCode ierr;
1151 
1152   PetscFunctionBegin;
1153   ierr = MatRealPart(a->A);CHKERRQ(ierr);
1154   ierr = MatRealPart(a->B);CHKERRQ(ierr);
1155   PetscFunctionReturn(0);
1156 }
1157 
1158 #undef __FUNCT__
1159 #define __FUNCT__ "MatImaginaryPart_MPISBAIJ"
1160 PetscErrorCode MatImaginaryPart_MPISBAIJ(Mat A)
1161 {
1162   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1163   PetscErrorCode ierr;
1164 
1165   PetscFunctionBegin;
1166   ierr = MatImaginaryPart(a->A);CHKERRQ(ierr);
1167   ierr = MatImaginaryPart(a->B);CHKERRQ(ierr);
1168   PetscFunctionReturn(0);
1169 }
1170 
1171 #undef __FUNCT__
1172 #define __FUNCT__ "MatZeroEntries_MPISBAIJ"
1173 PetscErrorCode MatZeroEntries_MPISBAIJ(Mat A)
1174 {
1175   Mat_MPISBAIJ   *l = (Mat_MPISBAIJ*)A->data;
1176   PetscErrorCode ierr;
1177 
1178   PetscFunctionBegin;
1179   ierr = MatZeroEntries(l->A);CHKERRQ(ierr);
1180   ierr = MatZeroEntries(l->B);CHKERRQ(ierr);
1181   PetscFunctionReturn(0);
1182 }
1183 
1184 #undef __FUNCT__
1185 #define __FUNCT__ "MatGetInfo_MPISBAIJ"
1186 PetscErrorCode MatGetInfo_MPISBAIJ(Mat matin,MatInfoType flag,MatInfo *info)
1187 {
1188   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)matin->data;
1189   Mat            A = a->A,B = a->B;
1190   PetscErrorCode ierr;
1191   PetscReal      isend[5],irecv[5];
1192 
1193   PetscFunctionBegin;
1194   info->block_size     = (PetscReal)matin->rmap.bs;
1195   ierr = MatGetInfo(A,MAT_LOCAL,info);CHKERRQ(ierr);
1196   isend[0] = info->nz_used; isend[1] = info->nz_allocated; isend[2] = info->nz_unneeded;
1197   isend[3] = info->memory;  isend[4] = info->mallocs;
1198   ierr = MatGetInfo(B,MAT_LOCAL,info);CHKERRQ(ierr);
1199   isend[0] += info->nz_used; isend[1] += info->nz_allocated; isend[2] += info->nz_unneeded;
1200   isend[3] += info->memory;  isend[4] += info->mallocs;
1201   if (flag == MAT_LOCAL) {
1202     info->nz_used      = isend[0];
1203     info->nz_allocated = isend[1];
1204     info->nz_unneeded  = isend[2];
1205     info->memory       = isend[3];
1206     info->mallocs      = isend[4];
1207   } else if (flag == MAT_GLOBAL_MAX) {
1208     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_MAX,matin->comm);CHKERRQ(ierr);
1209     info->nz_used      = irecv[0];
1210     info->nz_allocated = irecv[1];
1211     info->nz_unneeded  = irecv[2];
1212     info->memory       = irecv[3];
1213     info->mallocs      = irecv[4];
1214   } else if (flag == MAT_GLOBAL_SUM) {
1215     ierr = MPI_Allreduce(isend,irecv,5,MPIU_REAL,MPI_SUM,matin->comm);CHKERRQ(ierr);
1216     info->nz_used      = irecv[0];
1217     info->nz_allocated = irecv[1];
1218     info->nz_unneeded  = irecv[2];
1219     info->memory       = irecv[3];
1220     info->mallocs      = irecv[4];
1221   } else {
1222     SETERRQ1(PETSC_ERR_ARG_WRONG,"Unknown MatInfoType argument %d",(int)flag);
1223   }
1224   info->rows_global       = (PetscReal)A->rmap.N;
1225   info->columns_global    = (PetscReal)A->cmap.N;
1226   info->rows_local        = (PetscReal)A->rmap.N;
1227   info->columns_local     = (PetscReal)A->cmap.N;
1228   info->fill_ratio_given  = 0; /* no parallel LU/ILU/Cholesky */
1229   info->fill_ratio_needed = 0;
1230   info->factor_mallocs    = 0;
1231   PetscFunctionReturn(0);
1232 }
1233 
1234 #undef __FUNCT__
1235 #define __FUNCT__ "MatSetOption_MPISBAIJ"
1236 PetscErrorCode MatSetOption_MPISBAIJ(Mat A,MatOption op)
1237 {
1238   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1239   Mat_SeqSBAIJ   *aA = (Mat_SeqSBAIJ*)a->A->data;
1240   PetscErrorCode ierr;
1241 
1242   PetscFunctionBegin;
1243   switch (op) {
1244   case MAT_NO_NEW_NONZERO_LOCATIONS:
1245   case MAT_YES_NEW_NONZERO_LOCATIONS:
1246   case MAT_COLUMNS_UNSORTED:
1247   case MAT_COLUMNS_SORTED:
1248   case MAT_NEW_NONZERO_ALLOCATION_ERR:
1249   case MAT_KEEP_ZEROED_ROWS:
1250   case MAT_NEW_NONZERO_LOCATION_ERR:
1251     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1252     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1253     break;
1254   case MAT_ROW_ORIENTED:
1255     a->roworiented = PETSC_TRUE;
1256     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1257     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1258     break;
1259   case MAT_ROWS_SORTED:
1260   case MAT_ROWS_UNSORTED:
1261   case MAT_YES_NEW_DIAGONALS:
1262     ierr = PetscInfo1(A,"Option %d ignored\n",op);CHKERRQ(ierr);
1263     break;
1264   case MAT_COLUMN_ORIENTED:
1265     a->roworiented = PETSC_FALSE;
1266     ierr = MatSetOption(a->A,op);CHKERRQ(ierr);
1267     ierr = MatSetOption(a->B,op);CHKERRQ(ierr);
1268     break;
1269   case MAT_IGNORE_OFF_PROC_ENTRIES:
1270     a->donotstash = PETSC_TRUE;
1271     break;
1272   case MAT_NO_NEW_DIAGONALS:
1273     SETERRQ(PETSC_ERR_SUP,"MAT_NO_NEW_DIAGONALS");
1274   case MAT_USE_HASH_TABLE:
1275     a->ht_flag = PETSC_TRUE;
1276     break;
1277   case MAT_NOT_SYMMETRIC:
1278   case MAT_NOT_STRUCTURALLY_SYMMETRIC:
1279   case MAT_HERMITIAN:
1280     SETERRQ(PETSC_ERR_SUP,"Matrix must be symmetric");
1281   case MAT_SYMMETRIC:
1282   case MAT_STRUCTURALLY_SYMMETRIC:
1283   case MAT_NOT_HERMITIAN:
1284   case MAT_SYMMETRY_ETERNAL:
1285   case MAT_NOT_SYMMETRY_ETERNAL:
1286     break;
1287   case MAT_IGNORE_LOWER_TRIANGULAR:
1288     aA->ignore_ltriangular = PETSC_TRUE;
1289     break;
1290   case MAT_ERROR_LOWER_TRIANGULAR:
1291     aA->ignore_ltriangular = PETSC_FALSE;
1292     break;
1293   case MAT_GETROW_UPPERTRIANGULAR:
1294     aA->getrow_utriangular = PETSC_TRUE;
1295     break;
1296   default:
1297     SETERRQ1(PETSC_ERR_SUP,"unknown option %d",op);
1298   }
1299   PetscFunctionReturn(0);
1300 }
1301 
1302 #undef __FUNCT__
1303 #define __FUNCT__ "MatTranspose_MPISBAIJ"
1304 PetscErrorCode MatTranspose_MPISBAIJ(Mat A,Mat *B)
1305 {
1306   PetscErrorCode ierr;
1307   PetscFunctionBegin;
1308   ierr = MatDuplicate(A,MAT_COPY_VALUES,B);CHKERRQ(ierr);
1309   PetscFunctionReturn(0);
1310 }
1311 
1312 #undef __FUNCT__
1313 #define __FUNCT__ "MatDiagonalScale_MPISBAIJ"
1314 PetscErrorCode MatDiagonalScale_MPISBAIJ(Mat mat,Vec ll,Vec rr)
1315 {
1316   Mat_MPISBAIJ   *baij = (Mat_MPISBAIJ*)mat->data;
1317   Mat            a=baij->A, b=baij->B;
1318   PetscErrorCode ierr;
1319   PetscInt       nv,m,n;
1320   PetscTruth     flg;
1321 
1322   PetscFunctionBegin;
1323   if (ll != rr){
1324     ierr = VecEqual(ll,rr,&flg);CHKERRQ(ierr);
1325     if (!flg)
1326       SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"For symmetric format, left and right scaling vectors must be same\n");
1327   }
1328   if (!ll) PetscFunctionReturn(0);
1329 
1330   ierr = MatGetLocalSize(mat,&m,&n);CHKERRQ(ierr);
1331   if (m != n) SETERRQ2(PETSC_ERR_ARG_SIZ,"For symmetric format, local size %d %d must be same",m,n);
1332 
1333   ierr = VecGetLocalSize(rr,&nv);CHKERRQ(ierr);
1334   if (nv!=n) SETERRQ(PETSC_ERR_ARG_SIZ,"Left and right vector non-conforming local size");
1335 
1336   ierr = VecScatterBegin(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1337 
1338   /* left diagonalscale the off-diagonal part */
1339   ierr = (*b->ops->diagonalscale)(b,ll,PETSC_NULL);CHKERRQ(ierr);
1340 
1341   /* scale the diagonal part */
1342   ierr = (*a->ops->diagonalscale)(a,ll,rr);CHKERRQ(ierr);
1343 
1344   /* right diagonalscale the off-diagonal part */
1345   ierr = VecScatterEnd(rr,baij->lvec,INSERT_VALUES,SCATTER_FORWARD,baij->Mvctx);CHKERRQ(ierr);
1346   ierr = (*b->ops->diagonalscale)(b,PETSC_NULL,baij->lvec);CHKERRQ(ierr);
1347   PetscFunctionReturn(0);
1348 }
1349 
1350 #undef __FUNCT__
1351 #define __FUNCT__ "MatPrintHelp_MPISBAIJ"
1352 PetscErrorCode MatPrintHelp_MPISBAIJ(Mat A)
1353 {
1354   Mat_MPISBAIJ      *a = (Mat_MPISBAIJ*)A->data;
1355   MPI_Comm          comm = A->comm;
1356   static PetscTruth called = PETSC_FALSE;
1357   PetscErrorCode    ierr;
1358 
1359   PetscFunctionBegin;
1360   if (!a->rank) {
1361     ierr = MatPrintHelp_SeqSBAIJ(a->A);CHKERRQ(ierr);
1362   }
1363   if (called) {PetscFunctionReturn(0);} else called = PETSC_TRUE;
1364   ierr = (*PetscHelpPrintf)(comm," Options for MATMPISBAIJ matrix format (the defaults):\n");CHKERRQ(ierr);
1365   ierr = (*PetscHelpPrintf)(comm,"  -mat_use_hash_table <factor>: Use hashtable for efficient matrix assembly\n");CHKERRQ(ierr);
1366   PetscFunctionReturn(0);
1367 }
1368 
1369 #undef __FUNCT__
1370 #define __FUNCT__ "MatSetUnfactored_MPISBAIJ"
1371 PetscErrorCode MatSetUnfactored_MPISBAIJ(Mat A)
1372 {
1373   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
1374   PetscErrorCode ierr;
1375 
1376   PetscFunctionBegin;
1377   ierr = MatSetUnfactored(a->A);CHKERRQ(ierr);
1378   PetscFunctionReturn(0);
1379 }
1380 
1381 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat,MatDuplicateOption,Mat *);
1382 
1383 #undef __FUNCT__
1384 #define __FUNCT__ "MatEqual_MPISBAIJ"
1385 PetscErrorCode MatEqual_MPISBAIJ(Mat A,Mat B,PetscTruth *flag)
1386 {
1387   Mat_MPISBAIJ   *matB = (Mat_MPISBAIJ*)B->data,*matA = (Mat_MPISBAIJ*)A->data;
1388   Mat            a,b,c,d;
1389   PetscTruth     flg;
1390   PetscErrorCode ierr;
1391 
1392   PetscFunctionBegin;
1393   a = matA->A; b = matA->B;
1394   c = matB->A; d = matB->B;
1395 
1396   ierr = MatEqual(a,c,&flg);CHKERRQ(ierr);
1397   if (flg) {
1398     ierr = MatEqual(b,d,&flg);CHKERRQ(ierr);
1399   }
1400   ierr = MPI_Allreduce(&flg,flag,1,MPI_INT,MPI_LAND,A->comm);CHKERRQ(ierr);
1401   PetscFunctionReturn(0);
1402 }
1403 
1404 #undef __FUNCT__
1405 #define __FUNCT__ "MatCopy_MPISBAIJ"
1406 PetscErrorCode MatCopy_MPISBAIJ(Mat A,Mat B,MatStructure str)
1407 {
1408   PetscErrorCode ierr;
1409   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ *)A->data;
1410   Mat_MPISBAIJ   *b = (Mat_MPISBAIJ *)B->data;
1411 
1412   PetscFunctionBegin;
1413   /* If the two matrices don't have the same copy implementation, they aren't compatible for fast copy. */
1414   if ((str != SAME_NONZERO_PATTERN) || (A->ops->copy != B->ops->copy)) {
1415     ierr = MatGetRowUpperTriangular(A);CHKERRQ(ierr);
1416     ierr = MatCopy_Basic(A,B,str);CHKERRQ(ierr);
1417     ierr = MatRestoreRowUpperTriangular(A);CHKERRQ(ierr);
1418   } else {
1419     ierr = MatCopy(a->A,b->A,str);CHKERRQ(ierr);
1420     ierr = MatCopy(a->B,b->B,str);CHKERRQ(ierr);
1421   }
1422   PetscFunctionReturn(0);
1423 }
1424 
1425 #undef __FUNCT__
1426 #define __FUNCT__ "MatSetUpPreallocation_MPISBAIJ"
1427 PetscErrorCode MatSetUpPreallocation_MPISBAIJ(Mat A)
1428 {
1429   PetscErrorCode ierr;
1430 
1431   PetscFunctionBegin;
1432   ierr = MatMPISBAIJSetPreallocation(A,PetscMax(A->rmap.bs,1),PETSC_DEFAULT,0,PETSC_DEFAULT,0);CHKERRQ(ierr);
1433   PetscFunctionReturn(0);
1434 }
1435 
1436 #include "petscblaslapack.h"
1437 #undef __FUNCT__
1438 #define __FUNCT__ "MatAXPY_MPISBAIJ"
1439 PetscErrorCode MatAXPY_MPISBAIJ(Mat Y,PetscScalar a,Mat X,MatStructure str)
1440 {
1441   PetscErrorCode ierr;
1442   Mat_MPISBAIJ   *xx=(Mat_MPISBAIJ *)X->data,*yy=(Mat_MPISBAIJ *)Y->data;
1443   PetscBLASInt   bnz,one=1;
1444   Mat_SeqSBAIJ   *xa,*ya;
1445   Mat_SeqBAIJ    *xb,*yb;
1446 
1447   PetscFunctionBegin;
1448   if (str == SAME_NONZERO_PATTERN) {
1449     PetscScalar alpha = a;
1450     xa = (Mat_SeqSBAIJ *)xx->A->data;
1451     ya = (Mat_SeqSBAIJ *)yy->A->data;
1452     bnz = (PetscBLASInt)xa->nz;
1453     BLASaxpy_(&bnz,&alpha,xa->a,&one,ya->a,&one);
1454     xb = (Mat_SeqBAIJ *)xx->B->data;
1455     yb = (Mat_SeqBAIJ *)yy->B->data;
1456     bnz = (PetscBLASInt)xb->nz;
1457     BLASaxpy_(&bnz,&alpha,xb->a,&one,yb->a,&one);
1458   } else {
1459     ierr = MatGetRowUpperTriangular(X);CHKERRQ(ierr);
1460     ierr = MatAXPY_Basic(Y,a,X,str);CHKERRQ(ierr);
1461     ierr = MatRestoreRowUpperTriangular(X);CHKERRQ(ierr);
1462   }
1463   PetscFunctionReturn(0);
1464 }
1465 
1466 #undef __FUNCT__
1467 #define __FUNCT__ "MatGetSubMatrices_MPISBAIJ"
1468 PetscErrorCode MatGetSubMatrices_MPISBAIJ(Mat A,PetscInt n,const IS irow[],const IS icol[],MatReuse scall,Mat *B[])
1469 {
1470   PetscErrorCode ierr;
1471   PetscInt       i;
1472   PetscTruth     flg;
1473 
1474   PetscFunctionBegin;
1475   for (i=0; i<n; i++) {
1476     ierr = ISEqual(irow[i],icol[i],&flg);CHKERRQ(ierr);
1477     if (!flg) {
1478       SETERRQ(PETSC_ERR_SUP,"Can only get symmetric submatrix for MPISBAIJ matrices");
1479     }
1480   }
1481   ierr = MatGetSubMatrices_MPIBAIJ(A,n,irow,icol,scall,B);CHKERRQ(ierr);
1482   PetscFunctionReturn(0);
1483 }
1484 
1485 
1486 /* -------------------------------------------------------------------*/
1487 static struct _MatOps MatOps_Values = {
1488        MatSetValues_MPISBAIJ,
1489        MatGetRow_MPISBAIJ,
1490        MatRestoreRow_MPISBAIJ,
1491        MatMult_MPISBAIJ,
1492 /* 4*/ MatMultAdd_MPISBAIJ,
1493        MatMult_MPISBAIJ,       /* transpose versions are same as non-transpose */
1494        MatMultAdd_MPISBAIJ,
1495        0,
1496        0,
1497        0,
1498 /*10*/ 0,
1499        0,
1500        0,
1501        MatRelax_MPISBAIJ,
1502        MatTranspose_MPISBAIJ,
1503 /*15*/ MatGetInfo_MPISBAIJ,
1504        MatEqual_MPISBAIJ,
1505        MatGetDiagonal_MPISBAIJ,
1506        MatDiagonalScale_MPISBAIJ,
1507        MatNorm_MPISBAIJ,
1508 /*20*/ MatAssemblyBegin_MPISBAIJ,
1509        MatAssemblyEnd_MPISBAIJ,
1510        0,
1511        MatSetOption_MPISBAIJ,
1512        MatZeroEntries_MPISBAIJ,
1513 /*25*/ 0,
1514        0,
1515        0,
1516        0,
1517        0,
1518 /*30*/ MatSetUpPreallocation_MPISBAIJ,
1519        0,
1520        0,
1521        0,
1522        0,
1523 /*35*/ MatDuplicate_MPISBAIJ,
1524        0,
1525        0,
1526        0,
1527        0,
1528 /*40*/ MatAXPY_MPISBAIJ,
1529        MatGetSubMatrices_MPISBAIJ,
1530        MatIncreaseOverlap_MPISBAIJ,
1531        MatGetValues_MPISBAIJ,
1532        MatCopy_MPISBAIJ,
1533 /*45*/ MatPrintHelp_MPISBAIJ,
1534        MatScale_MPISBAIJ,
1535        0,
1536        0,
1537        0,
1538 /*50*/ 0,
1539        0,
1540        0,
1541        0,
1542        0,
1543 /*55*/ 0,
1544        0,
1545        MatSetUnfactored_MPISBAIJ,
1546        0,
1547        MatSetValuesBlocked_MPISBAIJ,
1548 /*60*/ 0,
1549        0,
1550        0,
1551        0,
1552        0,
1553 /*65*/ 0,
1554        0,
1555        0,
1556        0,
1557        0,
1558 /*70*/ MatGetRowMax_MPISBAIJ,
1559        0,
1560        0,
1561        0,
1562        0,
1563 /*75*/ 0,
1564        0,
1565        0,
1566        0,
1567        0,
1568 /*80*/ 0,
1569        0,
1570        0,
1571        0,
1572        MatLoad_MPISBAIJ,
1573 /*85*/ 0,
1574        0,
1575        0,
1576        0,
1577        0,
1578 /*90*/ 0,
1579        0,
1580        0,
1581        0,
1582        0,
1583 /*95*/ 0,
1584        0,
1585        0,
1586        0,
1587        0,
1588 /*100*/0,
1589        0,
1590        0,
1591        0,
1592        0,
1593 /*105*/0,
1594        MatRealPart_MPISBAIJ,
1595        MatImaginaryPart_MPISBAIJ,
1596        MatGetRowUpperTriangular_MPISBAIJ,
1597        MatRestoreRowUpperTriangular_MPISBAIJ
1598 };
1599 
1600 
1601 EXTERN_C_BEGIN
1602 #undef __FUNCT__
1603 #define __FUNCT__ "MatGetDiagonalBlock_MPISBAIJ"
1604 PetscErrorCode PETSCMAT_DLLEXPORT MatGetDiagonalBlock_MPISBAIJ(Mat A,PetscTruth *iscopy,MatReuse reuse,Mat *a)
1605 {
1606   PetscFunctionBegin;
1607   *a      = ((Mat_MPISBAIJ *)A->data)->A;
1608   *iscopy = PETSC_FALSE;
1609   PetscFunctionReturn(0);
1610 }
1611 EXTERN_C_END
1612 
1613 EXTERN_C_BEGIN
1614 #undef __FUNCT__
1615 #define __FUNCT__ "MatMPISBAIJSetPreallocation_MPISBAIJ"
1616 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation_MPISBAIJ(Mat B,PetscInt bs,PetscInt d_nz,PetscInt *d_nnz,PetscInt o_nz,PetscInt *o_nnz)
1617 {
1618   Mat_MPISBAIJ   *b;
1619   PetscErrorCode ierr;
1620   PetscInt       i,mbs,Mbs;
1621 
1622   PetscFunctionBegin;
1623   ierr = PetscOptionsGetInt(B->prefix,"-mat_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
1624 
1625   if (bs < 1) SETERRQ(PETSC_ERR_ARG_OUTOFRANGE,"Invalid block size specified, must be positive");
1626   if (d_nz == PETSC_DECIDE || d_nz == PETSC_DEFAULT) d_nz = 3;
1627   if (o_nz == PETSC_DECIDE || o_nz == PETSC_DEFAULT) o_nz = 1;
1628   if (d_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"d_nz cannot be less than 0: value %D",d_nz);
1629   if (o_nz < 0) SETERRQ1(PETSC_ERR_ARG_OUTOFRANGE,"o_nz cannot be less than 0: value %D",o_nz);
1630 
1631   B->rmap.bs = B->cmap.bs = bs;
1632   ierr = PetscMapInitialize(B->comm,&B->rmap);CHKERRQ(ierr);
1633   ierr = PetscMapInitialize(B->comm,&B->cmap);CHKERRQ(ierr);
1634 
1635   if (d_nnz) {
1636     for (i=0; i<B->rmap.n/bs; i++) {
1637       if (d_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"d_nnz cannot be less than -1: local row %D value %D",i,d_nnz[i]);
1638     }
1639   }
1640   if (o_nnz) {
1641     for (i=0; i<B->rmap.n/bs; i++) {
1642       if (o_nnz[i] < 0) SETERRQ2(PETSC_ERR_ARG_OUTOFRANGE,"o_nnz cannot be less than -1: local row %D value %D",i,o_nnz[i]);
1643     }
1644   }
1645   B->preallocated = PETSC_TRUE;
1646 
1647   b   = (Mat_MPISBAIJ*)B->data;
1648   mbs = B->rmap.n/bs;
1649   Mbs = B->rmap.N/bs;
1650   if (mbs*bs != B->rmap.n) {
1651     SETERRQ2(PETSC_ERR_ARG_SIZ,"No of local rows %D must be divisible by blocksize %D",B->rmap.N,bs);
1652   }
1653 
1654   B->rmap.bs  = bs;
1655   b->bs2 = bs*bs;
1656   b->mbs = mbs;
1657   b->nbs = mbs;
1658   b->Mbs = Mbs;
1659   b->Nbs = Mbs;
1660 
1661   for (i=0; i<=b->size; i++) {
1662     b->rangebs[i] = B->rmap.range[i]/bs;
1663   }
1664   b->rstartbs = B->rmap.rstart/bs;
1665   b->rendbs   = B->rmap.rend/bs;
1666 
1667   b->cstartbs = B->cmap.rstart/bs;
1668   b->cendbs   = B->cmap.rend/bs;
1669 
1670   ierr = MatCreate(PETSC_COMM_SELF,&b->A);CHKERRQ(ierr);
1671   ierr = MatSetSizes(b->A,B->rmap.n,B->cmap.n,B->rmap.n,B->cmap.n);CHKERRQ(ierr);
1672   ierr = MatSetType(b->A,MATSEQSBAIJ);CHKERRQ(ierr);
1673   ierr = MatSeqSBAIJSetPreallocation(b->A,bs,d_nz,d_nnz);CHKERRQ(ierr);
1674   ierr = PetscLogObjectParent(B,b->A);CHKERRQ(ierr);
1675 
1676   ierr = MatCreate(PETSC_COMM_SELF,&b->B);CHKERRQ(ierr);
1677   ierr = MatSetSizes(b->B,B->rmap.n,B->cmap.N,B->rmap.n,B->cmap.N);CHKERRQ(ierr);
1678   ierr = MatSetType(b->B,MATSEQBAIJ);CHKERRQ(ierr);
1679   ierr = MatSeqBAIJSetPreallocation(b->B,bs,o_nz,o_nnz);CHKERRQ(ierr);
1680   ierr = PetscLogObjectParent(B,b->B);CHKERRQ(ierr);
1681 
1682   /* build cache for off array entries formed */
1683   ierr = MatStashCreate_Private(B->comm,bs,&B->bstash);CHKERRQ(ierr);
1684 
1685   PetscFunctionReturn(0);
1686 }
1687 EXTERN_C_END
1688 
1689 /*MC
1690    MATMPISBAIJ - MATMPISBAIJ = "mpisbaij" - A matrix type to be used for distributed symmetric sparse block matrices,
1691    based on block compressed sparse row format.  Only the upper triangular portion of the matrix is stored.
1692 
1693    Options Database Keys:
1694 . -mat_type mpisbaij - sets the matrix type to "mpisbaij" during a call to MatSetFromOptions()
1695 
1696   Level: beginner
1697 
1698 .seealso: MatCreateMPISBAIJ
1699 M*/
1700 
1701 EXTERN_C_BEGIN
1702 #undef __FUNCT__
1703 #define __FUNCT__ "MatCreate_MPISBAIJ"
1704 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_MPISBAIJ(Mat B)
1705 {
1706   Mat_MPISBAIJ   *b;
1707   PetscErrorCode ierr;
1708   PetscTruth     flg;
1709 
1710   PetscFunctionBegin;
1711 
1712   ierr    = PetscNew(Mat_MPISBAIJ,&b);CHKERRQ(ierr);
1713   B->data = (void*)b;
1714   ierr    = PetscMemcpy(B->ops,&MatOps_Values,sizeof(struct _MatOps));CHKERRQ(ierr);
1715 
1716   B->ops->destroy    = MatDestroy_MPISBAIJ;
1717   B->ops->view       = MatView_MPISBAIJ;
1718   B->mapping    = 0;
1719   B->factor     = 0;
1720   B->assembled  = PETSC_FALSE;
1721 
1722   B->insertmode = NOT_SET_VALUES;
1723   ierr = MPI_Comm_rank(B->comm,&b->rank);CHKERRQ(ierr);
1724   ierr = MPI_Comm_size(B->comm,&b->size);CHKERRQ(ierr);
1725 
1726   /* build local table of row and column ownerships */
1727   ierr  = PetscMalloc((b->size+2)*sizeof(PetscInt),&b->rangebs);CHKERRQ(ierr);
1728 
1729   /* build cache for off array entries formed */
1730   ierr = MatStashCreate_Private(B->comm,1,&B->stash);CHKERRQ(ierr);
1731   b->donotstash  = PETSC_FALSE;
1732   b->colmap      = PETSC_NULL;
1733   b->garray      = PETSC_NULL;
1734   b->roworiented = PETSC_TRUE;
1735 
1736 #if defined(PETSC_USE_MAT_SINGLE)
1737   /* stuff for MatSetValues_XXX in single precision */
1738   b->setvalueslen     = 0;
1739   b->setvaluescopy    = PETSC_NULL;
1740 #endif
1741 
1742   /* stuff used in block assembly */
1743   b->barray       = 0;
1744 
1745   /* stuff used for matrix vector multiply */
1746   b->lvec         = 0;
1747   b->Mvctx        = 0;
1748   b->slvec0       = 0;
1749   b->slvec0b      = 0;
1750   b->slvec1       = 0;
1751   b->slvec1a      = 0;
1752   b->slvec1b      = 0;
1753   b->sMvctx       = 0;
1754 
1755   /* stuff for MatGetRow() */
1756   b->rowindices   = 0;
1757   b->rowvalues    = 0;
1758   b->getrowactive = PETSC_FALSE;
1759 
1760   /* hash table stuff */
1761   b->ht           = 0;
1762   b->hd           = 0;
1763   b->ht_size      = 0;
1764   b->ht_flag      = PETSC_FALSE;
1765   b->ht_fact      = 0;
1766   b->ht_total_ct  = 0;
1767   b->ht_insert_ct = 0;
1768 
1769   b->in_loc       = 0;
1770   b->v_loc        = 0;
1771   b->n_loc        = 0;
1772   ierr = PetscOptionsHasName(B->prefix,"-mat_use_hash_table",&flg);CHKERRQ(ierr);
1773   if (flg) {
1774     PetscReal fact = 1.39;
1775     ierr = MatSetOption(B,MAT_USE_HASH_TABLE);CHKERRQ(ierr);
1776     ierr = PetscOptionsGetReal(B->prefix,"-mat_use_hash_table",&fact,PETSC_NULL);CHKERRQ(ierr);
1777     if (fact <= 1.0) fact = 1.39;
1778     ierr = MatMPIBAIJSetHashTableFactor(B,fact);CHKERRQ(ierr);
1779     ierr = PetscInfo1(0,"Hash table Factor used %5.2f\n",fact);CHKERRQ(ierr);
1780   }
1781   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatStoreValues_C",
1782                                      "MatStoreValues_MPISBAIJ",
1783                                      MatStoreValues_MPISBAIJ);CHKERRQ(ierr);
1784   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatRetrieveValues_C",
1785                                      "MatRetrieveValues_MPISBAIJ",
1786                                      MatRetrieveValues_MPISBAIJ);CHKERRQ(ierr);
1787   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatGetDiagonalBlock_C",
1788                                      "MatGetDiagonalBlock_MPISBAIJ",
1789                                      MatGetDiagonalBlock_MPISBAIJ);CHKERRQ(ierr);
1790   ierr = PetscObjectComposeFunctionDynamic((PetscObject)B,"MatMPISBAIJSetPreallocation_C",
1791                                      "MatMPISBAIJSetPreallocation_MPISBAIJ",
1792                                      MatMPISBAIJSetPreallocation_MPISBAIJ);CHKERRQ(ierr);
1793   B->symmetric                  = PETSC_TRUE;
1794   B->structurally_symmetric     = PETSC_TRUE;
1795   B->symmetric_set              = PETSC_TRUE;
1796   B->structurally_symmetric_set = PETSC_TRUE;
1797   PetscFunctionReturn(0);
1798 }
1799 EXTERN_C_END
1800 
1801 /*MC
1802    MATSBAIJ - MATSBAIJ = "sbaij" - A matrix type to be used for symmetric block sparse matrices.
1803 
1804    This matrix type is identical to MATSEQSBAIJ when constructed with a single process communicator,
1805    and MATMPISBAIJ otherwise.
1806 
1807    Options Database Keys:
1808 . -mat_type sbaij - sets the matrix type to "sbaij" during a call to MatSetFromOptions()
1809 
1810   Level: beginner
1811 
1812 .seealso: MatCreateMPISBAIJ,MATSEQSBAIJ,MATMPISBAIJ
1813 M*/
1814 
1815 EXTERN_C_BEGIN
1816 #undef __FUNCT__
1817 #define __FUNCT__ "MatCreate_SBAIJ"
1818 PetscErrorCode PETSCMAT_DLLEXPORT MatCreate_SBAIJ(Mat A)
1819 {
1820   PetscErrorCode ierr;
1821   PetscMPIInt    size;
1822 
1823   PetscFunctionBegin;
1824   ierr = PetscObjectChangeTypeName((PetscObject)A,MATSBAIJ);CHKERRQ(ierr);
1825   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
1826   if (size == 1) {
1827     ierr = MatSetType(A,MATSEQSBAIJ);CHKERRQ(ierr);
1828   } else {
1829     ierr = MatSetType(A,MATMPISBAIJ);CHKERRQ(ierr);
1830   }
1831   PetscFunctionReturn(0);
1832 }
1833 EXTERN_C_END
1834 
1835 #undef __FUNCT__
1836 #define __FUNCT__ "MatMPISBAIJSetPreallocation"
1837 /*@C
1838    MatMPISBAIJSetPreallocation - For good matrix assembly performance
1839    the user should preallocate the matrix storage by setting the parameters
1840    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1841    performance can be increased by more than a factor of 50.
1842 
1843    Collective on Mat
1844 
1845    Input Parameters:
1846 +  A - the matrix
1847 .  bs   - size of blockk
1848 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1849            submatrix  (same for all local rows)
1850 .  d_nnz - array containing the number of block nonzeros in the various block rows
1851            in the upper triangular and diagonal part of the in diagonal portion of the local
1852            (possibly different for each block row) or PETSC_NULL.  You must leave room
1853            for the diagonal entry even if it is zero.
1854 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1855            submatrix (same for all local rows).
1856 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1857            off-diagonal portion of the local submatrix (possibly different for
1858            each block row) or PETSC_NULL.
1859 
1860 
1861    Options Database Keys:
1862 .   -mat_no_unroll - uses code that does not unroll the loops in the
1863                      block calculations (much slower)
1864 .   -mat_block_size - size of the blocks to use
1865 
1866    Notes:
1867 
1868    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1869    than it must be used on all processors that share the object for that argument.
1870 
1871    If the *_nnz parameter is given then the *_nz parameter is ignored
1872 
1873    Storage Information:
1874    For a square global matrix we define each processor's diagonal portion
1875    to be its local rows and the corresponding columns (a square submatrix);
1876    each processor's off-diagonal portion encompasses the remainder of the
1877    local matrix (a rectangular submatrix).
1878 
1879    The user can specify preallocated storage for the diagonal part of
1880    the local submatrix with either d_nz or d_nnz (not both).  Set
1881    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1882    memory allocation.  Likewise, specify preallocated storage for the
1883    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1884 
1885    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1886    the figure below we depict these three local rows and all columns (0-11).
1887 
1888 .vb
1889            0 1 2 3 4 5 6 7 8 9 10 11
1890           -------------------
1891    row 3  |  o o o d d d o o o o o o
1892    row 4  |  o o o d d d o o o o o o
1893    row 5  |  o o o d d d o o o o o o
1894           -------------------
1895 .ve
1896 
1897    Thus, any entries in the d locations are stored in the d (diagonal)
1898    submatrix, and any entries in the o locations are stored in the
1899    o (off-diagonal) submatrix.  Note that the d matrix is stored in
1900    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
1901 
1902    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
1903    plus the diagonal part of the d matrix,
1904    and o_nz should indicate the number of block nonzeros per row in the o matrix.
1905    In general, for PDE problems in which most nonzeros are near the diagonal,
1906    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
1907    or you will get TERRIBLE performance; see the users' manual chapter on
1908    matrices.
1909 
1910    Level: intermediate
1911 
1912 .keywords: matrix, block, aij, compressed row, sparse, parallel
1913 
1914 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
1915 @*/
1916 PetscErrorCode PETSCMAT_DLLEXPORT MatMPISBAIJSetPreallocation(Mat B,PetscInt bs,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[])
1917 {
1918   PetscErrorCode ierr,(*f)(Mat,PetscInt,PetscInt,const PetscInt[],PetscInt,const PetscInt[]);
1919 
1920   PetscFunctionBegin;
1921   ierr = PetscObjectQueryFunction((PetscObject)B,"MatMPISBAIJSetPreallocation_C",(void (**)(void))&f);CHKERRQ(ierr);
1922   if (f) {
1923     ierr = (*f)(B,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
1924   }
1925   PetscFunctionReturn(0);
1926 }
1927 
1928 #undef __FUNCT__
1929 #define __FUNCT__ "MatCreateMPISBAIJ"
1930 /*@C
1931    MatCreateMPISBAIJ - Creates a sparse parallel matrix in symmetric block AIJ format
1932    (block compressed row).  For good matrix assembly performance
1933    the user should preallocate the matrix storage by setting the parameters
1934    d_nz (or d_nnz) and o_nz (or o_nnz).  By setting these parameters accurately,
1935    performance can be increased by more than a factor of 50.
1936 
1937    Collective on MPI_Comm
1938 
1939    Input Parameters:
1940 +  comm - MPI communicator
1941 .  bs   - size of blockk
1942 .  m - number of local rows (or PETSC_DECIDE to have calculated if M is given)
1943            This value should be the same as the local size used in creating the
1944            y vector for the matrix-vector product y = Ax.
1945 .  n - number of local columns (or PETSC_DECIDE to have calculated if N is given)
1946            This value should be the same as the local size used in creating the
1947            x vector for the matrix-vector product y = Ax.
1948 .  M - number of global rows (or PETSC_DETERMINE to have calculated if m is given)
1949 .  N - number of global columns (or PETSC_DETERMINE to have calculated if n is given)
1950 .  d_nz  - number of block nonzeros per block row in diagonal portion of local
1951            submatrix  (same for all local rows)
1952 .  d_nnz - array containing the number of block nonzeros in the various block rows
1953            in the upper triangular portion of the in diagonal portion of the local
1954            (possibly different for each block block row) or PETSC_NULL.
1955            You must leave room for the diagonal entry even if it is zero.
1956 .  o_nz  - number of block nonzeros per block row in the off-diagonal portion of local
1957            submatrix (same for all local rows).
1958 -  o_nnz - array containing the number of nonzeros in the various block rows of the
1959            off-diagonal portion of the local submatrix (possibly different for
1960            each block row) or PETSC_NULL.
1961 
1962    Output Parameter:
1963 .  A - the matrix
1964 
1965    Options Database Keys:
1966 .   -mat_no_unroll - uses code that does not unroll the loops in the
1967                      block calculations (much slower)
1968 .   -mat_block_size - size of the blocks to use
1969 .   -mat_mpi - use the parallel matrix data structures even on one processor
1970                (defaults to using SeqBAIJ format on one processor)
1971 
1972    Notes:
1973    The number of rows and columns must be divisible by blocksize.
1974 
1975    The user MUST specify either the local or global matrix dimensions
1976    (possibly both).
1977 
1978    If PETSC_DECIDE or  PETSC_DETERMINE is used for a particular argument on one processor
1979    than it must be used on all processors that share the object for that argument.
1980 
1981    If the *_nnz parameter is given then the *_nz parameter is ignored
1982 
1983    Storage Information:
1984    For a square global matrix we define each processor's diagonal portion
1985    to be its local rows and the corresponding columns (a square submatrix);
1986    each processor's off-diagonal portion encompasses the remainder of the
1987    local matrix (a rectangular submatrix).
1988 
1989    The user can specify preallocated storage for the diagonal part of
1990    the local submatrix with either d_nz or d_nnz (not both).  Set
1991    d_nz=PETSC_DEFAULT and d_nnz=PETSC_NULL for PETSc to control dynamic
1992    memory allocation.  Likewise, specify preallocated storage for the
1993    off-diagonal part of the local submatrix with o_nz or o_nnz (not both).
1994 
1995    Consider a processor that owns rows 3, 4 and 5 of a parallel matrix. In
1996    the figure below we depict these three local rows and all columns (0-11).
1997 
1998 .vb
1999            0 1 2 3 4 5 6 7 8 9 10 11
2000           -------------------
2001    row 3  |  o o o d d d o o o o o o
2002    row 4  |  o o o d d d o o o o o o
2003    row 5  |  o o o d d d o o o o o o
2004           -------------------
2005 .ve
2006 
2007    Thus, any entries in the d locations are stored in the d (diagonal)
2008    submatrix, and any entries in the o locations are stored in the
2009    o (off-diagonal) submatrix.  Note that the d matrix is stored in
2010    MatSeqSBAIJ format and the o submatrix in MATSEQBAIJ format.
2011 
2012    Now d_nz should indicate the number of block nonzeros per row in the upper triangular
2013    plus the diagonal part of the d matrix,
2014    and o_nz should indicate the number of block nonzeros per row in the o matrix.
2015    In general, for PDE problems in which most nonzeros are near the diagonal,
2016    one expects d_nz >> o_nz.   For large problems you MUST preallocate memory
2017    or you will get TERRIBLE performance; see the users' manual chapter on
2018    matrices.
2019 
2020    Level: intermediate
2021 
2022 .keywords: matrix, block, aij, compressed row, sparse, parallel
2023 
2024 .seealso: MatCreate(), MatCreateSeqSBAIJ(), MatSetValues(), MatCreateMPIBAIJ()
2025 @*/
2026 
2027 PetscErrorCode PETSCMAT_DLLEXPORT MatCreateMPISBAIJ(MPI_Comm comm,PetscInt bs,PetscInt m,PetscInt n,PetscInt M,PetscInt N,PetscInt d_nz,const PetscInt d_nnz[],PetscInt o_nz,const PetscInt o_nnz[],Mat *A)
2028 {
2029   PetscErrorCode ierr;
2030   PetscMPIInt    size;
2031 
2032   PetscFunctionBegin;
2033   ierr = MatCreate(comm,A);CHKERRQ(ierr);
2034   ierr = MatSetSizes(*A,m,n,M,N);CHKERRQ(ierr);
2035   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2036   if (size > 1) {
2037     ierr = MatSetType(*A,MATMPISBAIJ);CHKERRQ(ierr);
2038     ierr = MatMPISBAIJSetPreallocation(*A,bs,d_nz,d_nnz,o_nz,o_nnz);CHKERRQ(ierr);
2039   } else {
2040     ierr = MatSetType(*A,MATSEQSBAIJ);CHKERRQ(ierr);
2041     ierr = MatSeqSBAIJSetPreallocation(*A,bs,d_nz,d_nnz);CHKERRQ(ierr);
2042   }
2043   PetscFunctionReturn(0);
2044 }
2045 
2046 
2047 #undef __FUNCT__
2048 #define __FUNCT__ "MatDuplicate_MPISBAIJ"
2049 static PetscErrorCode MatDuplicate_MPISBAIJ(Mat matin,MatDuplicateOption cpvalues,Mat *newmat)
2050 {
2051   Mat            mat;
2052   Mat_MPISBAIJ   *a,*oldmat = (Mat_MPISBAIJ*)matin->data;
2053   PetscErrorCode ierr;
2054   PetscInt       len=0,nt,bs=matin->rmap.bs,mbs=oldmat->mbs;
2055   PetscScalar    *array;
2056 
2057   PetscFunctionBegin;
2058   *newmat       = 0;
2059   ierr = MatCreate(matin->comm,&mat);CHKERRQ(ierr);
2060   ierr = MatSetSizes(mat,matin->rmap.n,matin->cmap.n,matin->rmap.N,matin->cmap.N);CHKERRQ(ierr);
2061   ierr = MatSetType(mat,matin->type_name);CHKERRQ(ierr);
2062   ierr = PetscMemcpy(mat->ops,matin->ops,sizeof(struct _MatOps));CHKERRQ(ierr);
2063   ierr = PetscMapCopy(matin->comm,&matin->rmap,&mat->rmap);CHKERRQ(ierr);
2064   ierr = PetscMapCopy(matin->comm,&matin->cmap,&mat->cmap);CHKERRQ(ierr);
2065 
2066   mat->factor       = matin->factor;
2067   mat->preallocated = PETSC_TRUE;
2068   mat->assembled    = PETSC_TRUE;
2069   mat->insertmode   = NOT_SET_VALUES;
2070 
2071   a = (Mat_MPISBAIJ*)mat->data;
2072   a->bs2   = oldmat->bs2;
2073   a->mbs   = oldmat->mbs;
2074   a->nbs   = oldmat->nbs;
2075   a->Mbs   = oldmat->Mbs;
2076   a->Nbs   = oldmat->Nbs;
2077 
2078 
2079   a->size         = oldmat->size;
2080   a->rank         = oldmat->rank;
2081   a->donotstash   = oldmat->donotstash;
2082   a->roworiented  = oldmat->roworiented;
2083   a->rowindices   = 0;
2084   a->rowvalues    = 0;
2085   a->getrowactive = PETSC_FALSE;
2086   a->barray       = 0;
2087   a->rstartbs    = oldmat->rstartbs;
2088   a->rendbs      = oldmat->rendbs;
2089   a->cstartbs    = oldmat->cstartbs;
2090   a->cendbs      = oldmat->cendbs;
2091 
2092   /* hash table stuff */
2093   a->ht           = 0;
2094   a->hd           = 0;
2095   a->ht_size      = 0;
2096   a->ht_flag      = oldmat->ht_flag;
2097   a->ht_fact      = oldmat->ht_fact;
2098   a->ht_total_ct  = 0;
2099   a->ht_insert_ct = 0;
2100 
2101   ierr = PetscMemcpy(a->rangebs,oldmat->rangebs,(a->size+2)*sizeof(PetscInt));CHKERRQ(ierr);
2102   ierr = MatStashCreate_Private(matin->comm,1,&mat->stash);CHKERRQ(ierr);
2103   ierr = MatStashCreate_Private(matin->comm,matin->rmap.bs,&mat->bstash);CHKERRQ(ierr);
2104   if (oldmat->colmap) {
2105 #if defined (PETSC_USE_CTABLE)
2106     ierr = PetscTableCreateCopy(oldmat->colmap,&a->colmap);CHKERRQ(ierr);
2107 #else
2108     ierr = PetscMalloc((a->Nbs)*sizeof(PetscInt),&a->colmap);CHKERRQ(ierr);
2109     ierr = PetscLogObjectMemory(mat,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2110     ierr = PetscMemcpy(a->colmap,oldmat->colmap,(a->Nbs)*sizeof(PetscInt));CHKERRQ(ierr);
2111 #endif
2112   } else a->colmap = 0;
2113 
2114   if (oldmat->garray && (len = ((Mat_SeqBAIJ*)(oldmat->B->data))->nbs)) {
2115     ierr = PetscMalloc(len*sizeof(PetscInt),&a->garray);CHKERRQ(ierr);
2116     ierr = PetscLogObjectMemory(mat,len*sizeof(PetscInt));CHKERRQ(ierr);
2117     ierr = PetscMemcpy(a->garray,oldmat->garray,len*sizeof(PetscInt));CHKERRQ(ierr);
2118   } else a->garray = 0;
2119 
2120   ierr =  VecDuplicate(oldmat->lvec,&a->lvec);CHKERRQ(ierr);
2121   ierr = PetscLogObjectParent(mat,a->lvec);CHKERRQ(ierr);
2122   ierr =  VecScatterCopy(oldmat->Mvctx,&a->Mvctx);CHKERRQ(ierr);
2123   ierr = PetscLogObjectParent(mat,a->Mvctx);CHKERRQ(ierr);
2124 
2125   ierr =  VecDuplicate(oldmat->slvec0,&a->slvec0);CHKERRQ(ierr);
2126   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2127   ierr =  VecDuplicate(oldmat->slvec1,&a->slvec1);CHKERRQ(ierr);
2128   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2129 
2130   ierr = VecGetLocalSize(a->slvec1,&nt);CHKERRQ(ierr);
2131   ierr = VecGetArray(a->slvec1,&array);CHKERRQ(ierr);
2132   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,bs*mbs,array,&a->slvec1a);CHKERRQ(ierr);
2133   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec1b);CHKERRQ(ierr);
2134   ierr = VecRestoreArray(a->slvec1,&array);CHKERRQ(ierr);
2135   ierr = VecGetArray(a->slvec0,&array);CHKERRQ(ierr);
2136   ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,nt-bs*mbs,array+bs*mbs,&a->slvec0b);CHKERRQ(ierr);
2137   ierr = VecRestoreArray(a->slvec0,&array);CHKERRQ(ierr);
2138   ierr = PetscLogObjectParent(mat,a->slvec0);CHKERRQ(ierr);
2139   ierr = PetscLogObjectParent(mat,a->slvec1);CHKERRQ(ierr);
2140   ierr = PetscLogObjectParent(mat,a->slvec0b);CHKERRQ(ierr);
2141   ierr = PetscLogObjectParent(mat,a->slvec1a);CHKERRQ(ierr);
2142   ierr = PetscLogObjectParent(mat,a->slvec1b);CHKERRQ(ierr);
2143 
2144   /* ierr =  VecScatterCopy(oldmat->sMvctx,&a->sMvctx); - not written yet, replaced by the lazy trick: */
2145   ierr = PetscObjectReference((PetscObject)oldmat->sMvctx);CHKERRQ(ierr);
2146   a->sMvctx = oldmat->sMvctx;
2147   ierr = PetscLogObjectParent(mat,a->sMvctx);CHKERRQ(ierr);
2148 
2149   ierr =  MatDuplicate(oldmat->A,cpvalues,&a->A);CHKERRQ(ierr);
2150   ierr = PetscLogObjectParent(mat,a->A);CHKERRQ(ierr);
2151   ierr =  MatDuplicate(oldmat->B,cpvalues,&a->B);CHKERRQ(ierr);
2152   ierr = PetscLogObjectParent(mat,a->B);CHKERRQ(ierr);
2153   ierr = PetscFListDuplicate(mat->qlist,&matin->qlist);CHKERRQ(ierr);
2154   *newmat = mat;
2155   PetscFunctionReturn(0);
2156 }
2157 
2158 #include "petscsys.h"
2159 
2160 #undef __FUNCT__
2161 #define __FUNCT__ "MatLoad_MPISBAIJ"
2162 PetscErrorCode MatLoad_MPISBAIJ(PetscViewer viewer, MatType type,Mat *newmat)
2163 {
2164   Mat            A;
2165   PetscErrorCode ierr;
2166   PetscInt       i,nz,j,rstart,rend;
2167   PetscScalar    *vals,*buf;
2168   MPI_Comm       comm = ((PetscObject)viewer)->comm;
2169   MPI_Status     status;
2170   PetscMPIInt    rank,size,tag = ((PetscObject)viewer)->tag,*sndcounts = 0,*browners,maxnz,*rowners,*locrowlens;
2171   PetscInt       header[4],*rowlengths = 0,M,N,m,*cols;
2172   PetscInt       *procsnz = 0,jj,*mycols,*ibuf;
2173   PetscInt       bs=1,Mbs,mbs,extra_rows;
2174   PetscInt       *dlens,*odlens,*mask,*masked1,*masked2,rowcount,odcount;
2175   PetscInt       dcount,kmax,k,nzcount,tmp;
2176   int            fd;
2177 
2178   PetscFunctionBegin;
2179   ierr = PetscOptionsGetInt(PETSC_NULL,"-matload_block_size",&bs,PETSC_NULL);CHKERRQ(ierr);
2180 
2181   ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr);
2182   ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
2183   if (!rank) {
2184     ierr = PetscViewerBinaryGetDescriptor(viewer,&fd);CHKERRQ(ierr);
2185     ierr = PetscBinaryRead(fd,(char *)header,4,PETSC_INT);CHKERRQ(ierr);
2186     if (header[0] != MAT_FILE_COOKIE) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"not matrix object");
2187     if (header[3] < 0) {
2188       SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"Matrix stored in special format, cannot load as MPISBAIJ");
2189     }
2190   }
2191 
2192   ierr = MPI_Bcast(header+1,3,MPIU_INT,0,comm);CHKERRQ(ierr);
2193   M = header[1]; N = header[2];
2194 
2195   if (M != N) SETERRQ(PETSC_ERR_SUP,"Can only do square matrices");
2196 
2197   /*
2198      This code adds extra rows to make sure the number of rows is
2199      divisible by the blocksize
2200   */
2201   Mbs        = M/bs;
2202   extra_rows = bs - M + bs*(Mbs);
2203   if (extra_rows == bs) extra_rows = 0;
2204   else                  Mbs++;
2205   if (extra_rows &&!rank) {
2206     ierr = PetscInfo(0,"Padding loaded matrix to match blocksize\n");CHKERRQ(ierr);
2207   }
2208 
2209   /* determine ownership of all rows */
2210   mbs        = Mbs/size + ((Mbs % size) > rank);
2211   m          = mbs*bs;
2212   ierr       = PetscMalloc(2*(size+2)*sizeof(PetscMPIInt),&rowners);CHKERRQ(ierr);
2213   browners   = rowners + size + 1;
2214   ierr       = MPI_Allgather(&mbs,1,MPI_INT,rowners+1,1,MPI_INT,comm);CHKERRQ(ierr);
2215   rowners[0] = 0;
2216   for (i=2; i<=size; i++) rowners[i] += rowners[i-1];
2217   for (i=0; i<=size;  i++) browners[i] = rowners[i]*bs;
2218   rstart = rowners[rank];
2219   rend   = rowners[rank+1];
2220 
2221   /* distribute row lengths to all processors */
2222   ierr = PetscMalloc((rend-rstart)*bs*sizeof(PetscMPIInt),&locrowlens);CHKERRQ(ierr);
2223   if (!rank) {
2224     ierr = PetscMalloc((M+extra_rows)*sizeof(PetscInt),&rowlengths);CHKERRQ(ierr);
2225     ierr = PetscBinaryRead(fd,rowlengths,M,PETSC_INT);CHKERRQ(ierr);
2226     for (i=0; i<extra_rows; i++) rowlengths[M+i] = 1;
2227     ierr = PetscMalloc(size*sizeof(PetscMPIInt),&sndcounts);CHKERRQ(ierr);
2228     for (i=0; i<size; i++) sndcounts[i] = browners[i+1] - browners[i];
2229     ierr = MPI_Scatterv(rowlengths,sndcounts,browners,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2230     ierr = PetscFree(sndcounts);CHKERRQ(ierr);
2231   } else {
2232     ierr = MPI_Scatterv(0,0,0,MPIU_INT,locrowlens,(rend-rstart)*bs,MPIU_INT,0,comm);CHKERRQ(ierr);
2233   }
2234 
2235   if (!rank) {   /* procs[0] */
2236     /* calculate the number of nonzeros on each processor */
2237     ierr = PetscMalloc(size*sizeof(PetscInt),&procsnz);CHKERRQ(ierr);
2238     ierr = PetscMemzero(procsnz,size*sizeof(PetscInt));CHKERRQ(ierr);
2239     for (i=0; i<size; i++) {
2240       for (j=rowners[i]*bs; j< rowners[i+1]*bs; j++) {
2241         procsnz[i] += rowlengths[j];
2242       }
2243     }
2244     ierr = PetscFree(rowlengths);CHKERRQ(ierr);
2245 
2246     /* determine max buffer needed and allocate it */
2247     maxnz = 0;
2248     for (i=0; i<size; i++) {
2249       maxnz = PetscMax(maxnz,procsnz[i]);
2250     }
2251     ierr = PetscMalloc(maxnz*sizeof(PetscInt),&cols);CHKERRQ(ierr);
2252 
2253     /* read in my part of the matrix column indices  */
2254     nz     = procsnz[0];
2255     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2256     mycols = ibuf;
2257     if (size == 1)  nz -= extra_rows;
2258     ierr = PetscBinaryRead(fd,mycols,nz,PETSC_INT);CHKERRQ(ierr);
2259     if (size == 1)  for (i=0; i< extra_rows; i++) { mycols[nz+i] = M+i; }
2260 
2261     /* read in every ones (except the last) and ship off */
2262     for (i=1; i<size-1; i++) {
2263       nz   = procsnz[i];
2264       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2265       ierr = MPI_Send(cols,nz,MPIU_INT,i,tag,comm);CHKERRQ(ierr);
2266     }
2267     /* read in the stuff for the last proc */
2268     if (size != 1) {
2269       nz   = procsnz[size-1] - extra_rows;  /* the extra rows are not on the disk */
2270       ierr = PetscBinaryRead(fd,cols,nz,PETSC_INT);CHKERRQ(ierr);
2271       for (i=0; i<extra_rows; i++) cols[nz+i] = M+i;
2272       ierr = MPI_Send(cols,nz+extra_rows,MPIU_INT,size-1,tag,comm);CHKERRQ(ierr);
2273     }
2274     ierr = PetscFree(cols);CHKERRQ(ierr);
2275   } else {  /* procs[i], i>0 */
2276     /* determine buffer space needed for message */
2277     nz = 0;
2278     for (i=0; i<m; i++) {
2279       nz += locrowlens[i];
2280     }
2281     ierr   = PetscMalloc(nz*sizeof(PetscInt),&ibuf);CHKERRQ(ierr);
2282     mycols = ibuf;
2283     /* receive message of column indices*/
2284     ierr = MPI_Recv(mycols,nz,MPIU_INT,0,tag,comm,&status);CHKERRQ(ierr);
2285     ierr = MPI_Get_count(&status,MPIU_INT,&maxnz);CHKERRQ(ierr);
2286     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2287   }
2288 
2289   /* loop over local rows, determining number of off diagonal entries */
2290   ierr     = PetscMalloc(2*(rend-rstart+1)*sizeof(PetscInt),&dlens);CHKERRQ(ierr);
2291   odlens   = dlens + (rend-rstart);
2292   ierr     = PetscMalloc(3*Mbs*sizeof(PetscInt),&mask);CHKERRQ(ierr);
2293   ierr     = PetscMemzero(mask,3*Mbs*sizeof(PetscInt));CHKERRQ(ierr);
2294   masked1  = mask    + Mbs;
2295   masked2  = masked1 + Mbs;
2296   rowcount = 0; nzcount = 0;
2297   for (i=0; i<mbs; i++) {
2298     dcount  = 0;
2299     odcount = 0;
2300     for (j=0; j<bs; j++) {
2301       kmax = locrowlens[rowcount];
2302       for (k=0; k<kmax; k++) {
2303         tmp = mycols[nzcount++]/bs; /* block col. index */
2304         if (!mask[tmp]) {
2305           mask[tmp] = 1;
2306           if (tmp < rstart || tmp >= rend) masked2[odcount++] = tmp; /* entry in off-diag portion */
2307           else masked1[dcount++] = tmp; /* entry in diag portion */
2308         }
2309       }
2310       rowcount++;
2311     }
2312 
2313     dlens[i]  = dcount;  /* d_nzz[i] */
2314     odlens[i] = odcount; /* o_nzz[i] */
2315 
2316     /* zero out the mask elements we set */
2317     for (j=0; j<dcount; j++) mask[masked1[j]] = 0;
2318     for (j=0; j<odcount; j++) mask[masked2[j]] = 0;
2319   }
2320 
2321   /* create our matrix */
2322   ierr = MatCreate(comm,&A);CHKERRQ(ierr);
2323   ierr = MatSetSizes(A,m,m,PETSC_DETERMINE,PETSC_DETERMINE);CHKERRQ(ierr);
2324   ierr = MatSetType(A,type);CHKERRQ(ierr);
2325   ierr = MatMPISBAIJSetPreallocation(A,bs,0,dlens,0,odlens);CHKERRQ(ierr);
2326   ierr = MatSetOption(A,MAT_COLUMNS_SORTED);CHKERRQ(ierr);
2327 
2328   if (!rank) {
2329     ierr = PetscMalloc(maxnz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2330     /* read in my part of the matrix numerical values  */
2331     nz = procsnz[0];
2332     vals = buf;
2333     mycols = ibuf;
2334     if (size == 1)  nz -= extra_rows;
2335     ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2336     if (size == 1)  for (i=0; i< extra_rows; i++) { vals[nz+i] = 1.0; }
2337 
2338     /* insert into matrix */
2339     jj      = rstart*bs;
2340     for (i=0; i<m; i++) {
2341       ierr = MatSetValues(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2342       mycols += locrowlens[i];
2343       vals   += locrowlens[i];
2344       jj++;
2345     }
2346 
2347     /* read in other processors (except the last one) and ship out */
2348     for (i=1; i<size-1; i++) {
2349       nz   = procsnz[i];
2350       vals = buf;
2351       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2352       ierr = MPI_Send(vals,nz,MPIU_SCALAR,i,A->tag,comm);CHKERRQ(ierr);
2353     }
2354     /* the last proc */
2355     if (size != 1){
2356       nz   = procsnz[i] - extra_rows;
2357       vals = buf;
2358       ierr = PetscBinaryRead(fd,vals,nz,PETSC_SCALAR);CHKERRQ(ierr);
2359       for (i=0; i<extra_rows; i++) vals[nz+i] = 1.0;
2360       ierr = MPI_Send(vals,nz+extra_rows,MPIU_SCALAR,size-1,A->tag,comm);CHKERRQ(ierr);
2361     }
2362     ierr = PetscFree(procsnz);CHKERRQ(ierr);
2363 
2364   } else {
2365     /* receive numeric values */
2366     ierr = PetscMalloc(nz*sizeof(PetscScalar),&buf);CHKERRQ(ierr);
2367 
2368     /* receive message of values*/
2369     vals   = buf;
2370     mycols = ibuf;
2371     ierr   = MPI_Recv(vals,nz,MPIU_SCALAR,0,A->tag,comm,&status);CHKERRQ(ierr);
2372     ierr   = MPI_Get_count(&status,MPIU_SCALAR,&maxnz);CHKERRQ(ierr);
2373     if (maxnz != nz) SETERRQ(PETSC_ERR_FILE_UNEXPECTED,"something is wrong with file");
2374 
2375     /* insert into matrix */
2376     jj      = rstart*bs;
2377     for (i=0; i<m; i++) {
2378       ierr    = MatSetValues_MPISBAIJ(A,1,&jj,locrowlens[i],mycols,vals,INSERT_VALUES);CHKERRQ(ierr);
2379       mycols += locrowlens[i];
2380       vals   += locrowlens[i];
2381       jj++;
2382     }
2383   }
2384 
2385   ierr = PetscFree(locrowlens);CHKERRQ(ierr);
2386   ierr = PetscFree(buf);CHKERRQ(ierr);
2387   ierr = PetscFree(ibuf);CHKERRQ(ierr);
2388   ierr = PetscFree(rowners);CHKERRQ(ierr);
2389   ierr = PetscFree(dlens);CHKERRQ(ierr);
2390   ierr = PetscFree(mask);CHKERRQ(ierr);
2391   ierr = MatAssemblyBegin(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2392   ierr = MatAssemblyEnd(A,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr);
2393   *newmat = A;
2394   PetscFunctionReturn(0);
2395 }
2396 
2397 #undef __FUNCT__
2398 #define __FUNCT__ "MatMPISBAIJSetHashTableFactor"
2399 /*XXXXX@
2400    MatMPISBAIJSetHashTableFactor - Sets the factor required to compute the size of the HashTable.
2401 
2402    Input Parameters:
2403 .  mat  - the matrix
2404 .  fact - factor
2405 
2406    Collective on Mat
2407 
2408    Level: advanced
2409 
2410   Notes:
2411    This can also be set by the command line option: -mat_use_hash_table fact
2412 
2413 .keywords: matrix, hashtable, factor, HT
2414 
2415 .seealso: MatSetOption()
2416 @XXXXX*/
2417 
2418 
2419 #undef __FUNCT__
2420 #define __FUNCT__ "MatGetRowMax_MPISBAIJ"
2421 PetscErrorCode MatGetRowMax_MPISBAIJ(Mat A,Vec v)
2422 {
2423   Mat_MPISBAIJ   *a = (Mat_MPISBAIJ*)A->data;
2424   Mat_SeqBAIJ    *b = (Mat_SeqBAIJ*)(a->B)->data;
2425   PetscReal      atmp;
2426   PetscReal      *work,*svalues,*rvalues;
2427   PetscErrorCode ierr;
2428   PetscInt       i,bs,mbs,*bi,*bj,brow,j,ncols,krow,kcol,col,row,Mbs,bcol;
2429   PetscMPIInt    rank,size;
2430   PetscInt       *rowners_bs,dest,count,source;
2431   PetscScalar    *va;
2432   MatScalar      *ba;
2433   MPI_Status     stat;
2434 
2435   PetscFunctionBegin;
2436   ierr = MatGetRowMax(a->A,v);CHKERRQ(ierr);
2437   ierr = VecGetArray(v,&va);CHKERRQ(ierr);
2438 
2439   ierr = MPI_Comm_size(A->comm,&size);CHKERRQ(ierr);
2440   ierr = MPI_Comm_rank(A->comm,&rank);CHKERRQ(ierr);
2441 
2442   bs   = A->rmap.bs;
2443   mbs  = a->mbs;
2444   Mbs  = a->Mbs;
2445   ba   = b->a;
2446   bi   = b->i;
2447   bj   = b->j;
2448 
2449   /* find ownerships */
2450   rowners_bs = A->rmap.range;
2451 
2452   /* each proc creates an array to be distributed */
2453   ierr = PetscMalloc(bs*Mbs*sizeof(PetscReal),&work);CHKERRQ(ierr);
2454   ierr = PetscMemzero(work,bs*Mbs*sizeof(PetscReal));CHKERRQ(ierr);
2455 
2456   /* row_max for B */
2457   if (rank != size-1){
2458     for (i=0; i<mbs; i++) {
2459       ncols = bi[1] - bi[0]; bi++;
2460       brow  = bs*i;
2461       for (j=0; j<ncols; j++){
2462         bcol = bs*(*bj);
2463         for (kcol=0; kcol<bs; kcol++){
2464           col = bcol + kcol;                 /* local col index */
2465           col += rowners_bs[rank+1];      /* global col index */
2466           for (krow=0; krow<bs; krow++){
2467             atmp = PetscAbsScalar(*ba); ba++;
2468             row = brow + krow;    /* local row index */
2469             if (PetscRealPart(va[row]) < atmp) va[row] = atmp;
2470             if (work[col] < atmp) work[col] = atmp;
2471           }
2472         }
2473         bj++;
2474       }
2475     }
2476 
2477     /* send values to its owners */
2478     for (dest=rank+1; dest<size; dest++){
2479       svalues = work + rowners_bs[dest];
2480       count   = rowners_bs[dest+1]-rowners_bs[dest];
2481       ierr    = MPI_Send(svalues,count,MPIU_REAL,dest,rank,A->comm);CHKERRQ(ierr);
2482     }
2483   }
2484 
2485   /* receive values */
2486   if (rank){
2487     rvalues = work;
2488     count   = rowners_bs[rank+1]-rowners_bs[rank];
2489     for (source=0; source<rank; source++){
2490       ierr = MPI_Recv(rvalues,count,MPIU_REAL,MPI_ANY_SOURCE,MPI_ANY_TAG,A->comm,&stat);CHKERRQ(ierr);
2491       /* process values */
2492       for (i=0; i<count; i++){
2493         if (PetscRealPart(va[i]) < rvalues[i]) va[i] = rvalues[i];
2494       }
2495     }
2496   }
2497 
2498   ierr = VecRestoreArray(v,&va);CHKERRQ(ierr);
2499   ierr = PetscFree(work);CHKERRQ(ierr);
2500   PetscFunctionReturn(0);
2501 }
2502 
2503 #undef __FUNCT__
2504 #define __FUNCT__ "MatRelax_MPISBAIJ"
2505 PetscErrorCode MatRelax_MPISBAIJ(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2506 {
2507   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2508   PetscErrorCode ierr;
2509   PetscInt       mbs=mat->mbs,bs=matin->rmap.bs;
2510   PetscScalar    *x,*b,*ptr,zero=0.0;
2511   Vec            bb1;
2512 
2513   PetscFunctionBegin;
2514   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2515   if (bs > 1)
2516     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2517 
2518   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2519     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2520       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2521       its--;
2522     }
2523 
2524     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2525     while (its--){
2526 
2527       /* lower triangular part: slvec0b = - B^T*xx */
2528       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,mat->slvec0b);CHKERRQ(ierr);
2529 
2530       /* copy xx into slvec0a */
2531       ierr = VecGetArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2532       ierr = VecGetArray(xx,&x);CHKERRQ(ierr);
2533       ierr = PetscMemcpy(ptr,x,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2534       ierr = VecRestoreArray(mat->slvec0,&ptr);CHKERRQ(ierr);
2535 
2536       ierr = VecScale(mat->slvec0,-1.0);CHKERRQ(ierr);
2537 
2538       /* copy bb into slvec1a */
2539       ierr = VecGetArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2540       ierr = VecGetArray(bb,&b);CHKERRQ(ierr);
2541       ierr = PetscMemcpy(ptr,b,bs*mbs*sizeof(MatScalar));CHKERRQ(ierr);
2542       ierr = VecRestoreArray(mat->slvec1,&ptr);CHKERRQ(ierr);
2543 
2544       /* set slvec1b = 0 */
2545       ierr = VecSet(mat->slvec1b,zero);CHKERRQ(ierr);
2546 
2547       ierr = VecScatterBegin(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2548       ierr = VecRestoreArray(xx,&x);CHKERRQ(ierr);
2549       ierr = VecRestoreArray(bb,&b);CHKERRQ(ierr);
2550       ierr = VecScatterEnd(mat->slvec0,mat->slvec1,ADD_VALUES,SCATTER_FORWARD,mat->sMvctx);CHKERRQ(ierr);
2551 
2552       /* upper triangular part: bb1 = bb1 - B*x */
2553       ierr = (*mat->B->ops->multadd)(mat->B,mat->slvec1b,mat->slvec1a,bb1);CHKERRQ(ierr);
2554 
2555       /* local diagonal sweep */
2556       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2557     }
2558     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2559   } else {
2560     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2561   }
2562   PetscFunctionReturn(0);
2563 }
2564 
2565 #undef __FUNCT__
2566 #define __FUNCT__ "MatRelax_MPISBAIJ_2comm"
2567 PetscErrorCode MatRelax_MPISBAIJ_2comm(Mat matin,Vec bb,PetscReal omega,MatSORType flag,PetscReal fshift,PetscInt its,PetscInt lits,Vec xx)
2568 {
2569   Mat_MPISBAIJ   *mat = (Mat_MPISBAIJ*)matin->data;
2570   PetscErrorCode ierr;
2571   Vec            lvec1,bb1;
2572 
2573   PetscFunctionBegin;
2574   if (its <= 0 || lits <= 0) SETERRQ2(PETSC_ERR_ARG_WRONG,"Relaxation requires global its %D and local its %D both positive",its,lits);
2575   if (matin->rmap.bs > 1)
2576     SETERRQ(PETSC_ERR_SUP,"SSOR for block size > 1 is not yet implemented");
2577 
2578   if ((flag & SOR_LOCAL_SYMMETRIC_SWEEP) == SOR_LOCAL_SYMMETRIC_SWEEP){
2579     if ( flag & SOR_ZERO_INITIAL_GUESS ) {
2580       ierr = (*mat->A->ops->relax)(mat->A,bb,omega,flag,fshift,lits,lits,xx);CHKERRQ(ierr);
2581       its--;
2582     }
2583 
2584     ierr = VecDuplicate(mat->lvec,&lvec1);CHKERRQ(ierr);
2585     ierr = VecDuplicate(bb,&bb1);CHKERRQ(ierr);
2586     while (its--){
2587       ierr = VecScatterBegin(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2588 
2589       /* lower diagonal part: bb1 = bb - B^T*xx */
2590       ierr = (*mat->B->ops->multtranspose)(mat->B,xx,lvec1);CHKERRQ(ierr);
2591       ierr = VecScale(lvec1,-1.0);CHKERRQ(ierr);
2592 
2593       ierr = VecScatterEnd(xx,mat->lvec,INSERT_VALUES,SCATTER_FORWARD,mat->Mvctx);CHKERRQ(ierr);
2594       ierr = VecCopy(bb,bb1);CHKERRQ(ierr);
2595       ierr = VecScatterBegin(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2596 
2597       /* upper diagonal part: bb1 = bb1 - B*x */
2598       ierr = VecScale(mat->lvec,-1.0);CHKERRQ(ierr);
2599       ierr = (*mat->B->ops->multadd)(mat->B,mat->lvec,bb1,bb1);CHKERRQ(ierr);
2600 
2601       ierr = VecScatterEnd(lvec1,bb1,ADD_VALUES,SCATTER_REVERSE,mat->Mvctx);CHKERRQ(ierr);
2602 
2603       /* diagonal sweep */
2604       ierr = (*mat->A->ops->relax)(mat->A,bb1,omega,SOR_SYMMETRIC_SWEEP,fshift,lits,lits,xx);CHKERRQ(ierr);
2605     }
2606     ierr = VecDestroy(lvec1);CHKERRQ(ierr);
2607     ierr = VecDestroy(bb1);CHKERRQ(ierr);
2608   } else {
2609     SETERRQ(PETSC_ERR_SUP,"MatSORType is not supported for SBAIJ matrix format");
2610   }
2611   PetscFunctionReturn(0);
2612 }
2613 
2614